Cell: pass pointers to stored memory values

author Ian Romanick <idr@us.ibm.com>

Fri, 15 Feb 2008 00:53:05 +0000 (16:53 -0800)

committer Ian Romanick <idr@us.ibm.com>

Fri, 15 Feb 2008 00:53:05 +0000 (16:53 -0800)
author Ian Romanick <idr@us.ibm.com>
Fri, 15 Feb 2008 00:53:05 +0000 (16:53 -0800)
committer Ian Romanick <idr@us.ibm.com>
Fri, 15 Feb 2008 00:53:05 +0000 (16:53 -0800)
diff --git a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c

index 3bbf9b7be4fddf4a22c80b43fe611c134e1ad723..45e3c26c001a89721a28ed44a16e835b8b07594d 100644 (file)
--- a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c
+++ b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c
@@ -59,8 +59,59 @@
  
  #define DRAW_DBG 0
  
+static const qword fetch_shuffle_data[] = {
+   /* Shuffle used by CVT_64_FLOAT
+    */
+   {
+      0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
+      0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+   },
+
+   /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED
+    */
+   {
+      0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
+      0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
+   },
+   
+   /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED
+    */
+   {
+      0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
+      0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
+   },
+   
+   /* High value shuffle used by trans4x4.
+    */
+   {
+      0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
+      0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17
+   },
+
+   /* Low value shuffle used by trans4x4.
+    */
+   {
+      0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
+      0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F
+   }
+};
+
+
+static INLINE void
+trans4x4(qword row0, qword row1, qword row2, qword row3, qword *out,
+         const qword *shuffle)
+{
+   qword t1 = si_shufb(row0, row2, shuffle[3]);
+   qword t2 = si_shufb(row0, row2, shuffle[4]);
+   qword t3 = si_shufb(row1, row3, shuffle[3]);
+   qword t4 = si_shufb(row1, row3, shuffle[4]);
+
+   out[0] = si_shufb(t1, t3, shuffle[3]);
+   out[1] = si_shufb(t1, t3, shuffle[4]);
+   out[2] = si_shufb(t2, t4, shuffle[3]);
+   out[3] = si_shufb(t2, t4, shuffle[4]);
+}
  
-static const vec_float4 defaults = { 0.0, 0.0, 0.0, 1.0 };
  
  /**
   * Fetch between 1 and 32 bytes from an unaligned address
@@ -100,140 +151,117 @@ fetch_unaligned(qword *dst, unsigned ea, unsigned size)
  }
  
  
-#define CVT_32_FLOAT(q)    (*(q))
+#define CVT_32_FLOAT(q, s)    (*(q))
  
  static INLINE qword
-CVT_64_FLOAT(const qword *qw)
+CVT_64_FLOAT(const qword *qw, const qword *shuffle)
  {
-   qword shuf_first = (qword) {
-      0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-      0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-   };
-
     qword a = si_frds(qw[0]);
     qword b = si_frds(si_rotqbyi(qw[0], 8));
     qword c = si_frds(qw[1]);
     qword d = si_frds(si_rotqbyi(qw[1], 8));
  
-   qword ab = si_shufb(a, b, shuf_first);
-   qword cd = si_shufb(c, d, si_rotqbyi(shuf_first, 8));
+   qword ab = si_shufb(a, b, shuffle[0]);
+   qword cd = si_shufb(c, d, si_rotqbyi(shuffle[0], 8));
     
     return si_or(ab, cd);
  }
  
  
  static INLINE qword
-CVT_8_USCALED(const qword *qw)
+CVT_8_USCALED(const qword *qw, const qword *shuffle)
  {
-   qword shuffle = (qword) {
-      0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
-      0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
-   };
-
-   return si_cuflt(si_shufb(*qw, *qw, shuffle), 0);
+   return si_cuflt(si_shufb(*qw, *qw, shuffle[1]), 0);
  }
  
  
  static INLINE qword
-CVT_16_USCALED(const qword *qw)
+CVT_16_USCALED(const qword *qw, const qword *shuffle)
  {
-   qword shuffle = (qword) {
-      0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
-      0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
-   };
-
-   return si_cuflt(si_shufb(*qw, *qw, shuffle), 0);
+   return si_cuflt(si_shufb(*qw, *qw, shuffle[2]), 0);
  }
  
  
  static INLINE qword
-CVT_32_USCALED(const qword *qw)
+CVT_32_USCALED(const qword *qw, const qword *shuffle)
  {
+   (void) shuffle;
     return si_cuflt(*qw, 0);
  }
  
  static INLINE qword
-CVT_8_SSCALED(const qword *qw)
+CVT_8_SSCALED(const qword *qw, const qword *shuffle)
  {
-   qword shuffle = (qword) {
-      0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
-      0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
-   };
-
-   return si_csflt(si_shufb(*qw, *qw, shuffle), 0);
+   return si_csflt(si_shufb(*qw, *qw, shuffle[1]), 0);
  }
  
  
  static INLINE qword
-CVT_16_SSCALED(const qword *qw)
+CVT_16_SSCALED(const qword *qw, const qword *shuffle)
  {
-   qword shuffle = (qword) {
-      0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
-      0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
-   };
-
-   return si_csflt(si_shufb(*qw, *qw, shuffle), 0);
+   return si_csflt(si_shufb(*qw, *qw, shuffle[2]), 0);
  }
  
  
  static INLINE qword
-CVT_32_SSCALED(const qword *qw)
+CVT_32_SSCALED(const qword *qw, const qword *shuffle)
  {
+   (void) shuffle;
     return si_csflt(*qw, 0);
  }
  
  
  static INLINE qword
-CVT_8_UNORM(const qword *qw)
+CVT_8_UNORM(const qword *qw, const qword *shuffle)
  {
     const qword scale = (qword) spu_splats(1.0f / 255.0f);
-   return si_fm(CVT_8_USCALED(qw), scale);
+   return si_fm(CVT_8_USCALED(qw, shuffle), scale);
  }
  
  
  static INLINE qword
-CVT_16_UNORM(const qword *qw)
+CVT_16_UNORM(const qword *qw, const qword *shuffle)
  {
     const qword scale = (qword) spu_splats(1.0f / 65535.0f);
-   return si_fm(CVT_16_USCALED(qw), scale);
+   return si_fm(CVT_16_USCALED(qw, shuffle), scale);
  }
  
  
  static INLINE qword
-CVT_32_UNORM(const qword *qw)
+CVT_32_UNORM(const qword *qw, const qword *shuffle)
  {
     const qword scale = (qword) spu_splats(1.0f / 4294967295.0f);
-   return si_fm(CVT_32_USCALED(qw), scale);
+   return si_fm(CVT_32_USCALED(qw, shuffle), scale);
  }
  
  
  static INLINE qword
-CVT_8_SNORM(const qword *qw)
+CVT_8_SNORM(const qword *qw, const qword *shuffle)
  {
     const qword scale = (qword) spu_splats(1.0f / 127.0f);
-   return si_fm(CVT_8_SSCALED(qw), scale);
+   return si_fm(CVT_8_SSCALED(qw, shuffle), scale);
  }
  
  
  static INLINE qword
-CVT_16_SNORM(const qword *qw)
+CVT_16_SNORM(const qword *qw, const qword *shuffle)
  {
     const qword scale = (qword) spu_splats(1.0f / 32767.0f);
-   return si_fm(CVT_16_SSCALED(qw), scale);
+   return si_fm(CVT_16_SSCALED(qw, shuffle), scale);
  }
  
  
  static INLINE qword
-CVT_32_SNORM(const qword *qw)
+CVT_32_SNORM(const qword *qw, const qword *shuffle)
  {
     const qword scale = (qword) spu_splats(1.0f / 2147483647.0f);
-   return si_fm(CVT_32_SSCALED(qw), scale);
+   return si_fm(CVT_32_SSCALED(qw, shuffle), scale);
  }
  
  #define SZ_4 si_il(0U)
-#define SZ_3 si_rotqmbyi(si_il(~0), -12)
-#define SZ_2 si_rotqmbyi(si_il(~0), -8)
-#define SZ_1 si_rotqmbyi(si_il(~0), -4)
+#define SZ_3 si_fsmbi(0x000f)
+#define SZ_2 si_fsmbi(0x00ff)
+#define SZ_1 si_fsmbi(0x0fff)
  
  /**
   * Fetch a float[4] vertex attribute from memory, doing format/type
@@ -244,17 +272,19 @@ CVT_32_SNORM(const qword *qw)
   */
  #define FETCH_ATTRIB( NAME, SZ, CVT, N )                       \
  static void                                                    \
-fetch_##NAME(qword *out, const qword *in)                      \
+fetch_##NAME(qword *out, const qword *in, qword defaults, \
+                const qword *shuffle)  \
  {                                                              \
     qword tmp[4];                                               \
                                                                 \
-   tmp[0] = si_selb(CVT(in + (0 * N)), (qword) defaults, SZ);  \
-   tmp[1] = si_selb(CVT(in + (1 * N)), (qword) defaults, SZ);  \
-   tmp[2] = si_selb(CVT(in + (2 * N)), (qword) defaults, SZ);  \
-   tmp[3] = si_selb(CVT(in + (3 * N)), (qword) defaults, SZ);  \
-   _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) tmp);       \
+   tmp[0] = si_selb(CVT(in + (0 * N), shuffle), defaults, SZ);         \
+   tmp[1] = si_selb(CVT(in + (1 * N), shuffle), defaults, SZ);         \
+   tmp[2] = si_selb(CVT(in + (2 * N), shuffle), defaults, SZ);         \
+   tmp[3] = si_selb(CVT(in + (3 * N), shuffle), defaults, SZ);         \
+   trans4x4(tmp[0], tmp[1], tmp[2], tmp[3], out, shuffle);             \
  }
  
+
  FETCH_ATTRIB( R64G64B64A64_FLOAT,   SZ_4, CVT_64_FLOAT, 2 )
  FETCH_ATTRIB( R64G64B64_FLOAT,      SZ_3, CVT_64_FLOAT, 2 )
  FETCH_ATTRIB( R64G64_FLOAT,         SZ_2, CVT_64_FLOAT, 2 )
@@ -582,6 +612,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
     /* loop over vertex attributes (vertex shader inputs)
      */
     for (attr = 0; attr < nr_attrs; attr++) {
+      const qword default_values = (qword)(vec_float4){ 0.0, 0.0, 0.0, 1.0 };
        const unsigned pitch = draw->vertex_fetch.pitch[attr];
        const uint64_t src = draw->vertex_fetch.src_ptr[attr];
        const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr];
@@ -602,8 +633,8 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
           printf("SPU: fetching = 0x%llx\n", addr);
  #endif
  
-        fetch_unaligned(& in[idx], addr, bytes_per_entry);
-        idx += quads_per_entry;
+         fetch_unaligned(& in[idx], addr, bytes_per_entry);
+         idx += quads_per_entry;
        }
  
        /* Be nice and zero out any missing vertices.
@@ -613,7 +644,8 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
  
        /* Convert all 4 vertices to vectors of float.
         */
-      (*fetch)(&machine->Inputs[attr].xyzw[0].q, in);
+      (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, default_values,
+               fetch_shuffle_data);
     }
  }
  
diff --git a/src/mesa/pipe/cell/spu/spu_vertex_shader.h b/src/mesa/pipe/cell/spu/spu_vertex_shader.h

index 8b37a239a473937f45b657c4b542c993d8f26f32..b5bf31e67db22d535d7d10e9dae875203209d55c 100644 (file)
--- a/src/mesa/pipe/cell/spu/spu_vertex_shader.h
+++ b/src/mesa/pipe/cell/spu/spu_vertex_shader.h
@@ -6,7 +6,8 @@
  
  struct spu_vs_context;
  
-typedef void (*spu_fetch_func)(qword *out, const qword *in);
+typedef void (*spu_fetch_func)(qword *out, const qword *in, qword defaults,
+                              const qword *shuffle_data);
  typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw,
                                      struct spu_exec_machine *machine,
                                      const unsigned *elts,
author	Ian Romanick <idr@us.ibm.com>
	Fri, 15 Feb 2008 00:53:05 +0000 (16:53 -0800)
committer	Ian Romanick <idr@us.ibm.com>
	Fri, 15 Feb 2008 00:53:05 +0000 (16:53 -0800)
src/mesa/pipe/cell/spu/spu_vertex_fetch.c		patch \| blob \| history
src/mesa/pipe/cell/spu/spu_vertex_shader.h		patch \| blob \| history