Code re-org. Add comments.

author Michal Krol <michal@tungstengraphics.com>

Mon, 29 Oct 2007 12:14:54 +0000 (12:14 +0000)

committer Michal Krol <michal@tungstengraphics.com>

Mon, 29 Oct 2007 13:25:00 +0000 (13:25 +0000)
author Michal Krol <michal@tungstengraphics.com>
Mon, 29 Oct 2007 12:14:54 +0000 (12:14 +0000)
committer Michal Krol <michal@tungstengraphics.com>
Mon, 29 Oct 2007 13:25:00 +0000 (13:25 +0000)
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c

index b8edcf0a2e9035527d05f5f40f000a8c65660c01..abdebd6f97aa0b63e6820338d4675232f9db3c6d 100755 (executable)
--- a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
@@ -22,14 +22,9 @@
  
  #define TEMP_R0   TGSI_EXEC_TEMP_R0
  
-static struct x86_reg
-get_argument(
-   unsigned index )
-{
-   return x86_make_disp(
-      x86_make_reg( file_REG32, reg_SP ),
-      (index + 1) * 4 );
-}
+/**
+ * X86 utility functions.
+ */
  
  static struct x86_reg
  make_xmm(
@@ -40,6 +35,10 @@ make_xmm(
        (enum x86_reg_name) xmm );
  }
  
+/**
+ * X86 register mapping helpers.
+ */
+
  static struct x86_reg
  get_const_base( void )
  {
@@ -48,16 +47,6 @@ get_const_base( void )
        reg_CX );
  }
  
-static struct x86_reg
-get_const(
-   unsigned vec,
-   unsigned chan )
-{
-   return x86_make_disp(
-      get_const_base(),
-      (vec * 4 + chan) * 4 );
-}
-
  static struct x86_reg
  get_input_base( void )
  {
@@ -67,55 +56,78 @@ get_input_base( void )
  }
  
  static struct x86_reg
-get_input(
-   unsigned vec,
-   unsigned chan )
+get_output_base( void )
  {
-   return x86_make_disp(
-      get_input_base(),
-      (vec * 4 + chan) * 16 );
+   return x86_make_reg(
+      file_REG32,
+      reg_DX );
  }
  
  static struct x86_reg
-get_output_base( void )
+get_temp_base( void )
  {
     return x86_make_reg(
        file_REG32,
-      reg_DX );
+      reg_BX );
  }
  
  static struct x86_reg
-get_output(
+get_coef_base( void )
+{
+   return get_output_base();
+}
+
+/**
+ * Data access helpers.
+ */
+
+static struct x86_reg
+get_argument(
+   unsigned index )
+{
+   return x86_make_disp(
+      x86_make_reg( file_REG32, reg_SP ),
+      (index + 1) * 4 );
+}
+
+static struct x86_reg
+get_const(
     unsigned vec,
     unsigned chan )
  {
     return x86_make_disp(
-      get_output_base(),
-      (vec * 4 + chan) * 16 );
+      get_const_base(),
+      (vec * 4 + chan) * 4 );
  }
  
  static struct x86_reg
-get_temp_base( void )
+get_input(
+   unsigned vec,
+   unsigned chan )
  {
-   return x86_make_reg(
-      file_REG32,
-      reg_BX );
+   return x86_make_disp(
+      get_input_base(),
+      (vec * 4 + chan) * 16 );
  }
  
  static struct x86_reg
-get_temp(
+get_output(
     unsigned vec,
     unsigned chan )
  {
     return x86_make_disp(
-      get_temp_base(),
+      get_output_base(),
        (vec * 4 + chan) * 16 );
  }
  
  static struct x86_reg
-get_coef_base( void )
+get_temp(
+   unsigned vec,
+   unsigned chan )
  {
-   return get_output_base();
+   return x86_make_disp(
+      get_temp_base(),
+      (vec * 4 + chan) * 16 );
  }
  
  static struct x86_reg
@@ -129,6 +141,10 @@ get_coef(
        ((vec * 3 + member) * 4 + chan) * 4 );
  }
  
+/**
+ * Data fetch helpers.
+ */
+
  static void
  emit_const(
     struct x86_function *func,
@@ -160,19 +176,6 @@ emit_inputf(
        get_input( vec, chan ) );
  }
  
-static void
-emit_inputs(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movups(
-      func,
-      get_input( vec, chan ),
-      make_xmm( xmm ) );
-}
-
  static void
  emit_output(
     struct x86_function *func,
@@ -199,19 +202,6 @@ emit_tempf(
        get_temp( vec, chan ) );
  }
  
-static void
-emit_temps(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   sse_movaps(
-      func,
-      get_temp( vec, chan ),
-      make_xmm( xmm ) );
-}
-
  static void
  emit_coef(
     struct x86_function *func,
@@ -231,49 +221,34 @@ emit_coef(
        SHUF( 0, 0, 0, 0 ) );
  }
  
-static void
-emit_coef_a0(
-   struct x86_function *func,
-   unsigned xmm,
-   unsigned vec,
-   unsigned chan )
-{
-   emit_coef(
-      func,
-      xmm,
-      vec,
-      chan,
-      0 );
-}
+/**
+ * Data store helpers.
+ */
  
  static void
-emit_coef_dadx(
+emit_inputs(
     struct x86_function *func,
     unsigned xmm,
     unsigned vec,
     unsigned chan )
  {
-   emit_coef(
+   sse_movups(
        func,
-      xmm,
-      vec,
-      chan,
-      1 );
+      get_input( vec, chan ),
+      make_xmm( xmm ) );
  }
  
  static void
-emit_coef_dady(
+emit_temps(
     struct x86_function *func,
     unsigned xmm,
     unsigned vec,
     unsigned chan )
  {
-   emit_coef(
+   sse_movaps(
        func,
-      xmm,
-      vec,
-      chan,
-      2 );
+      get_temp( vec, chan ),
+      make_xmm( xmm ) );
  }
  
  static void
@@ -290,57 +265,59 @@ emit_addrs(
        chan );
  }
  
-static void
-emit_abs(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse_andps(
-      func,
-      make_xmm( xmm ),
-      get_temp(
-         TGSI_EXEC_TEMP_7FFFFFFF_I,
-         TGSI_EXEC_TEMP_7FFFFFFF_C ) );
-}
+/**
+ * Coefficent fetch helpers.
+ */
  
  static void
-emit_neg(
+emit_coef_a0(
     struct x86_function *func,
-   unsigned xmm )
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
  {
-   sse_xorps(
+   emit_coef(
        func,
-      make_xmm( xmm ),
-      get_temp(
-         TGSI_EXEC_TEMP_80000000_I,
-         TGSI_EXEC_TEMP_80000000_C ) );
+      xmm,
+      vec,
+      chan,
+      0 );
  }
  
  static void
-emit_setsign(
+emit_coef_dadx(
     struct x86_function *func,
-   unsigned xmm )
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
  {
-   sse_orps(
+   emit_coef(
        func,
-      make_xmm( xmm ),
-      get_temp(
-         TGSI_EXEC_TEMP_80000000_I,
-         TGSI_EXEC_TEMP_80000000_C ) );
+      xmm,
+      vec,
+      chan,
+      1 );
  }
  
  static void
-emit_add(
+emit_coef_dady(
     struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
+   unsigned xmm,
+   unsigned vec,
+   unsigned chan )
  {
-   sse_addps(
+   emit_coef(
        func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
+      xmm,
+      vec,
+      chan,
+      2 );
  }
  
+/**
+ * Function call helpers.
+ */
+
  static void
  emit_push_gp(
     struct x86_function *func )
@@ -433,6 +410,35 @@ emit_func_call_dst_src(
        code );
  }
  
+/**
+ * Low-level instruction translators.
+ */
+
+static void
+emit_abs(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse_andps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_7FFFFFFF_I,
+         TGSI_EXEC_TEMP_7FFFFFFF_C ) );
+}
+
+static void
+emit_add(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
+{
+   sse_addps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
  static void XSTDCALL
  cos4f(
     float *store )
@@ -463,114 +469,95 @@ emit_cos(
  }
  
  static void XSTDCALL
-sin4f(
+ex24f(
     float *store )
  {
  #ifdef WIN32
-   store[0] = (float) sin( (double) store[0] );
-   store[1] = (float) sin( (double) store[1] );
-   store[2] = (float) sin( (double) store[2] );
-   store[3] = (float) sin( (double) store[3] );
+   store[0] = (float) pow( 2.0, (double) store[0] );
+   store[1] = (float) pow( 2.0, (double) store[1] );
+   store[2] = (float) pow( 2.0, (double) store[2] );
+   store[3] = (float) pow( 2.0, (double) store[3] );
  #else
     const unsigned X = TEMP_R0 * 16;
-   store[X + 0] = sinf( store[X + 0] );
-   store[X + 1] = sinf( store[X + 1] );
-   store[X + 2] = sinf( store[X + 2] );
-   store[X + 3] = sinf( store[X + 3] );
+   store[X + 0] = powf( 2.0f, store[X + 0] );
+   store[X + 1] = powf( 2.0f, store[X + 1] );
+   store[X + 2] = powf( 2.0f, store[X + 2] );
+   store[X + 3] = powf( 2.0f, store[X + 3] );
  #endif
  }
  
  static void
-emit_sin (struct x86_function *func,
-          unsigned xmm_dst)
+emit_ex2(
+   struct x86_function *func,
+   unsigned xmm_dst )
  {
     emit_func_call_dst(
        func,
        xmm_dst,
-      sin4f );
+      ex24f );
  }
  
  static void
-emit_mov(
+emit_f2it(
     struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
-{
-   sse_movups(
-      func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
-}
-
-static void
-emit_mul (struct x86_function *func,
-          unsigned xmm_dst,
-          unsigned xmm_src)
+   unsigned xmm )
  {
-   sse_mulps(
+   sse2_cvttps2dq(
        func,
-      make_xmm( xmm_dst ),
-      make_xmm( xmm_src ) );
+      make_xmm( xmm ),
+      make_xmm( xmm ) );
  }
  
  static void XSTDCALL
-pow4f(
+flr4f(
     float *store )
  {
  #ifdef WIN32
-   store[0] = (float) pow( (double) store[0], (double) store[4] );
-   store[1] = (float) pow( (double) store[1], (double) store[5] );
-   store[2] = (float) pow( (double) store[2], (double) store[6] );
-   store[3] = (float) pow( (double) store[3], (double) store[7] );
+   const unsigned X = 0;
  #else
     const unsigned X = TEMP_R0 * 16;
-   store[X + 0] = powf( store[X + 0], store[X + 4] );
-   store[X + 1] = powf( store[X + 1], store[X + 5] );
-   store[X + 2] = powf( store[X + 2], store[X + 6] );
-   store[X + 3] = powf( store[X + 3], store[X + 7] );
  #endif
+   store[X + 0] = (float) floor( (double) store[X + 0] );
+   store[X + 1] = (float) floor( (double) store[X + 1] );
+   store[X + 2] = (float) floor( (double) store[X + 2] );
+   store[X + 3] = (float) floor( (double) store[X + 3] );
  }
  
  static void
-emit_pow(
+emit_flr(
     struct x86_function *func,
-   unsigned xmm_dst,
-   unsigned xmm_src )
+   unsigned xmm_dst )
  {
-   emit_func_call_dst_src(
+   emit_func_call_dst(
        func,
        xmm_dst,
-      xmm_src,
-      pow4f );
+      flr4f );
  }
  
  static void XSTDCALL
-ex24f(
+frc4f(
     float *store )
  {
  #ifdef WIN32
-   store[0] = (float) pow( 2.0, (double) store[0] );
-   store[1] = (float) pow( 2.0, (double) store[1] );
-   store[2] = (float) pow( 2.0, (double) store[2] );
-   store[3] = (float) pow( 2.0, (double) store[3] );
+   const unsigned X = 0;
  #else
     const unsigned X = TEMP_R0 * 16;
-   store[X + 0] = powf( 2.0f, store[X + 0] );
-   store[X + 1] = powf( 2.0f, store[X + 1] );
-   store[X + 2] = powf( 2.0f, store[X + 2] );
-   store[X + 3] = powf( 2.0f, store[X + 3] );
  #endif
+   store[X + 0] -= (float) floor( (double) store[X + 0] );
+   store[X + 1] -= (float) floor( (double) store[X + 1] );
+   store[X + 2] -= (float) floor( (double) store[X + 2] );
+   store[X + 3] -= (float) floor( (double) store[X + 3] );
  }
  
  static void
-emit_ex2(
+emit_frc(
     struct x86_function *func,
     unsigned xmm_dst )
  {
     emit_func_call_dst(
        func,
        xmm_dst,
-      ex24f );
+      frc4f );
  }
  
  static void XSTDCALL
@@ -599,56 +586,71 @@ emit_lg2(
        lg24f );
  }
  
-static void XSTDCALL
-flr4f(
-   float *store )
+static void
+emit_mov(
+   struct x86_function *func,
+   unsigned xmm_dst,
+   unsigned xmm_src )
  {
-#ifdef WIN32
-   const unsigned X = 0;
-#else
-   const unsigned X = TEMP_R0 * 16;
-#endif
-   store[X + 0] = (float) floor( (double) store[X + 0] );
-   store[X + 1] = (float) floor( (double) store[X + 1] );
-   store[X + 2] = (float) floor( (double) store[X + 2] );
-   store[X + 3] = (float) floor( (double) store[X + 3] );
+   sse_movups(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
  }
  
  static void
-emit_flr(
+emit_mul (struct x86_function *func,
+          unsigned xmm_dst,
+          unsigned xmm_src)
+{
+   sse_mulps(
+      func,
+      make_xmm( xmm_dst ),
+      make_xmm( xmm_src ) );
+}
+
+static void
+emit_neg(
     struct x86_function *func,
-   unsigned xmm_dst )
+   unsigned xmm )
  {
-   emit_func_call_dst(
+   sse_xorps(
        func,
-      xmm_dst,
-      flr4f );
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_80000000_I,
+         TGSI_EXEC_TEMP_80000000_C ) );
  }
  
  static void XSTDCALL
-frc4f(
+pow4f(
     float *store )
  {
  #ifdef WIN32
-   const unsigned X = 0;
+   store[0] = (float) pow( (double) store[0], (double) store[4] );
+   store[1] = (float) pow( (double) store[1], (double) store[5] );
+   store[2] = (float) pow( (double) store[2], (double) store[6] );
+   store[3] = (float) pow( (double) store[3], (double) store[7] );
  #else
     const unsigned X = TEMP_R0 * 16;
+   store[X + 0] = powf( store[X + 0], store[X + 4] );
+   store[X + 1] = powf( store[X + 1], store[X + 5] );
+   store[X + 2] = powf( store[X + 2], store[X + 6] );
+   store[X + 3] = powf( store[X + 3], store[X + 7] );
  #endif
-   store[X + 0] -= (float) floor( (double) store[X + 0] );
-   store[X + 1] -= (float) floor( (double) store[X + 1] );
-   store[X + 2] -= (float) floor( (double) store[X + 2] );
-   store[X + 3] -= (float) floor( (double) store[X + 3] );
  }
  
  static void
-emit_frc(
+emit_pow(
     struct x86_function *func,
-   unsigned xmm_dst )
+   unsigned xmm_dst,
+   unsigned xmm_src )
  {
-   emit_func_call_dst(
+   emit_func_call_dst_src(
        func,
        xmm_dst,
-      frc4f );
+      xmm_src,
+      pow4f );
  }
  
  static void
@@ -675,6 +677,47 @@ emit_rsqrt(
        make_xmm( xmm_src ) );
  }
  
+static void
+emit_setsign(
+   struct x86_function *func,
+   unsigned xmm )
+{
+   sse_orps(
+      func,
+      make_xmm( xmm ),
+      get_temp(
+         TGSI_EXEC_TEMP_80000000_I,
+         TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void XSTDCALL
+sin4f(
+   float *store )
+{
+#ifdef WIN32
+   store[0] = (float) sin( (double) store[0] );
+   store[1] = (float) sin( (double) store[1] );
+   store[2] = (float) sin( (double) store[2] );
+   store[3] = (float) sin( (double) store[3] );
+#else
+   const unsigned X = TEMP_R0 * 16;
+   store[X + 0] = sinf( store[X + 0] );
+   store[X + 1] = sinf( store[X + 1] );
+   store[X + 2] = sinf( store[X + 2] );
+   store[X + 3] = sinf( store[X + 3] );
+#endif
+}
+
+static void
+emit_sin (struct x86_function *func,
+          unsigned xmm_dst)
+{
+   emit_func_call_dst(
+      func,
+      xmm_dst,
+      sin4f );
+}
+
  static void
  emit_sub(
     struct x86_function *func,
@@ -687,6 +730,10 @@ emit_sub(
        make_xmm( xmm_src ) );
  }
  
+/**
+ * Register fetch.
+ */
+
  static void
  emit_fetch(
     struct x86_function *func,
@@ -769,6 +816,13 @@ emit_fetch(
     }
  }
  
+#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
+   emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
+
+/**
+ * Register store.
+ */
+
  static void
  emit_store(
     struct x86_function *func,
@@ -820,6 +874,13 @@ emit_store(
     }
  }
  
+#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
+   emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
+
+/**
+ * High-level instruction translators.
+ */
+
  static void
  emit_kil(
     struct x86_function *func,
@@ -915,12 +976,6 @@ emit_kil(
        x86_make_reg( file_REG32, reg_AX ) );
  }
  
-#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
-   emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
-
-#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
-   emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
-
  static void
  emit_setcc(
     struct x86_function *func,
@@ -981,17 +1036,6 @@ emit_cmp(
     }
  }
  
-static void
-emit_f2it(
-   struct x86_function *func,
-   unsigned xmm )
-{
-   sse2_cvttps2dq(
-      func,
-      make_xmm( xmm ),
-      make_xmm( xmm ) );
-}
-
  static void
  emit_instruction(
     struct x86_function *func,
author	Michal Krol <michal@tungstengraphics.com>
	Mon, 29 Oct 2007 12:14:54 +0000 (12:14 +0000)
committer	Michal Krol <michal@tungstengraphics.com>
	Mon, 29 Oct 2007 13:25:00 +0000 (13:25 +0000)