gallium: use new util_fast_exp2(), _log2(), pow() functions

author Brian Paul <brian.paul@tungstengraphics.com>

Fri, 22 Aug 2008 21:16:43 +0000 (15:16 -0600)

committer Brian Paul <brian.paul@tungstengraphics.com>

Fri, 22 Aug 2008 21:16:43 +0000 (15:16 -0600)
author Brian Paul <brian.paul@tungstengraphics.com>
Fri, 22 Aug 2008 21:16:43 +0000 (15:16 -0600)
committer Brian Paul <brian.paul@tungstengraphics.com>
Fri, 22 Aug 2008 21:16:43 +0000 (15:16 -0600)
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c

index 441877d46f0e0850ae1b3b47720e38c5370191da..41bdd012d56051fdb06f5b81660531d3fc71fd73 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -31,6 +31,7 @@
  
  #include "pipe/p_util.h"
  #include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
  #include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_util.h"
  #include "tgsi/tgsi_exec.h"
@@ -43,6 +44,7 @@
  
  #ifdef PIPE_ARCH_X86
  #define DISASSEM 0
+#define FAST_MATH 1
  
  static const char *files[] =
  {
@@ -1380,14 +1382,28 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
     return TRUE;
  }
  
+
+
  /* A wrapper for powf().
   * Makes sure it is cdecl and operates on floats.
   */
  static float PIPE_CDECL _powerf( float x, float y )
  {
+#if FAST_MATH
+   return util_fast_pow(x, y);
+#else
     return powf( x, y );
+#endif
  }
  
+#if FAST_MATH
+static float PIPE_CDECL _exp2(float x)
+{
+   return util_fast_exp2(x);
+}
+#endif
+
+
  /* Really not sufficient -- need to check for conditions that could
   * generate inf/nan values, which will slow things down hugely.
   */
@@ -1442,6 +1458,48 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst
  }
  
  
+#if FAST_MATH
+static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
+{
+   uint i;
+
+   /* For absolute correctness, need to spill/invalidate all XMM regs
+    * too.  
+    */
+   for (i = 0; i < 8; i++) {
+      if (cp->xmm[i].dirty) 
+         spill(cp, i);
+      aos_release_xmm_reg(cp, i);
+   }
+
+   /* Push caller-save (ie scratch) regs.  
+    */
+   x86_cdecl_caller_push_regs( cp->func );
+
+   x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) );
+
+   x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+   x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
+
+   /* tmp_EAX has been pushed & will be restored below */
+   x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _exp2 );
+   x86_call( cp->func, cp->tmp_EAX );
+
+   x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 4) );
+
+   x86_cdecl_caller_pop_regs( cp->func );
+
+   /* Note retval on x87 stack:
+    */
+   cp->func->x87_stack++;
+
+   x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+
+   return TRUE;
+}
+#endif
+
+
  static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
  {
     struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
@@ -1662,7 +1720,9 @@ emit_instruction( struct aos_compilation *cp,
        return emit_RND(cp, inst);
  
     case TGSI_OPCODE_EXPBASE2:
-#if 0
+#if FAST_MATH
+      return emit_EXPBASE2(cp, inst);
+#elif 0
        /* this seems to fail for "larger" exponents.
         * See glean tvertProg1's EX2 test.
         */
@@ -1827,6 +1887,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
     struct aos_compilation cp;
     unsigned fixup, label;
  
+   util_init_math();
+
     tgsi_parse_init( &parse, varient->base.vs->state.tokens );
  
     memset(&cp, 0, sizeof(cp));
@@ -2135,4 +2197,4 @@ struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
  
  
  
-#endif
+#endif /* PIPE_ARCH_X86 */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c

index 88a34a69613d7045c8b01a2c265209e544dfdce7..e28b56c842fc63953e1502d5a18e482d074c584d 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -57,6 +57,9 @@
  #include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_util.h"
  #include "tgsi_exec.h"
+#include "util/u_math.h"
+
+#define FAST_MATH 1
  
  #define TILE_TOP_LEFT     0
  #define TILE_TOP_RIGHT    1
@@ -145,6 +148,8 @@ tgsi_exec_machine_bind_shader(
     tgsi_dump(tokens, 0);
  #endif
  
+   util_init_math();
+
     mach->Tokens = tokens;
     mach->Samplers = samplers;
  
@@ -448,10 +453,17 @@ micro_exp2(
     union tgsi_exec_channel *dst,
     const union tgsi_exec_channel *src)
  {
+#if FAST_MATH
+   dst->f[0] = util_fast_exp2( src->f[0] );
+   dst->f[1] = util_fast_exp2( src->f[1] );
+   dst->f[2] = util_fast_exp2( src->f[2] );
+   dst->f[3] = util_fast_exp2( src->f[3] );
+#else
     dst->f[0] = powf( 2.0f, src->f[0] );
     dst->f[1] = powf( 2.0f, src->f[1] );
     dst->f[2] = powf( 2.0f, src->f[2] );
     dst->f[3] = powf( 2.0f, src->f[3] );
+#endif
  }
  
  static void
@@ -528,10 +540,17 @@ micro_lg2(
     union tgsi_exec_channel *dst,
     const union tgsi_exec_channel *src )
  {
+#if FAST_MATH
+   dst->f[0] = util_fast_log2( src->f[0] );
+   dst->f[1] = util_fast_log2( src->f[1] );
+   dst->f[2] = util_fast_log2( src->f[2] );
+   dst->f[3] = util_fast_log2( src->f[3] );
+#else
     dst->f[0] = logf( src->f[0] ) * 1.442695f;
     dst->f[1] = logf( src->f[1] ) * 1.442695f;
     dst->f[2] = logf( src->f[2] ) * 1.442695f;
     dst->f[3] = logf( src->f[3] ) * 1.442695f;
+#endif
  }
  
  static void
@@ -796,10 +815,17 @@ micro_pow(
     const union tgsi_exec_channel *src0,
     const union tgsi_exec_channel *src1 )
  {
+#if FAST_MATH
+   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
+   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
+   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
+   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
+#else
     dst->f[0] = powf( src0->f[0], src1->f[0] );
     dst->f[1] = powf( src0->f[1], src1->f[1] );
     dst->f[2] = powf( src0->f[2], src1->f[2] );
     dst->f[3] = powf( src0->f[3], src1->f[3] );
+#endif
  }
  
  static void
@@ -2024,7 +2050,11 @@ exec_instruction(
      /* TGSI_OPCODE_EX2 */
        FETCH(&r[0], 0, CHAN_X);
  
+#if FAST_MATH
+      micro_exp2( &r[0], &r[0] );
+#else
        micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
+#endif
  
        FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          STORE( &r[0], 0, chan_index );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c

index 485e5a0e6f571d528d110831dd4e1f37e0a897ce..e390607023712b34a613fe848829a9cf99777ac4 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -27,6 +27,7 @@
  
  #include "pipe/p_util.h"
  #include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
  #include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_util.h"
  #include "tgsi_exec.h"
@@ -42,6 +43,8 @@
   */
  #define HIGH_PRECISION 1
  
+#define FAST_MATH 1
+
  
  #define FOR_EACH_CHANNEL( CHAN )\
     for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
@@ -623,10 +626,17 @@ ex24f(
  {
     const unsigned X = 0;
  
+#if FAST_MATH
+   store[X + 0] = util_fast_exp2( store[X + 0] );
+   store[X + 1] = util_fast_exp2( store[X + 1] );
+   store[X + 2] = util_fast_exp2( store[X + 2] );
+   store[X + 3] = util_fast_exp2( store[X + 3] );
+#else
     store[X + 0] = powf( 2.0f, store[X + 0] );
     store[X + 1] = powf( 2.0f, store[X + 1] );
     store[X + 2] = powf( 2.0f, store[X + 2] );
     store[X + 3] = powf( 2.0f, store[X + 3] );
+#endif
  }
  
  static void
@@ -762,10 +772,17 @@ pow4f(
  {
     const unsigned X = 0;
  
+#if FAST_MATH
+   store[X + 0] = util_fast_pow( store[X + 0], store[X + 4] );
+   store[X + 1] = util_fast_pow( store[X + 1], store[X + 5] );
+   store[X + 2] = util_fast_pow( store[X + 2], store[X + 6] );
+   store[X + 3] = util_fast_pow( store[X + 3], store[X + 7] );
+#else
     store[X + 0] = powf( store[X + 0], store[X + 4] );
     store[X + 1] = powf( store[X + 1], store[X + 5] );
     store[X + 2] = powf( store[X + 2], store[X + 6] );
     store[X + 3] = powf( store[X + 3], store[X + 7] );
+#endif
  }
  
  static void
@@ -2235,6 +2252,8 @@ tgsi_emit_sse2(
     unsigned ok = 1;
     uint num_immediates = 0;
  
+   util_init_math();
+
     func->csr = func->store;
  
     tgsi_parse_init( &parse, tokens );
author	Brian Paul <brian.paul@tungstengraphics.com>
	Fri, 22 Aug 2008 21:16:43 +0000 (15:16 -0600)
committer	Brian Paul <brian.paul@tungstengraphics.com>
	Fri, 22 Aug 2008 21:16:43 +0000 (15:16 -0600)
src/gallium/auxiliary/draw/draw_vs_aos.c		patch \| blob \| history
src/gallium/auxiliary/tgsi/tgsi_exec.c		patch \| blob \| history
src/gallium/auxiliary/tgsi/tgsi_sse2.c		patch \| blob \| history