gallium: Switch one vertex path over to new translate module
authorKeith Whitwell <keith@tungstengraphics.com>
Tue, 15 Apr 2008 18:14:31 +0000 (19:14 +0100)
committerKeith Whitwell <keith@tungstengraphics.com>
Tue, 15 Apr 2008 18:14:31 +0000 (19:14 +0100)
Will eventually do this for all instances where we are converting vertices
from one format to another.

src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
src/gallium/auxiliary/translate/translate.h
src/gallium/auxiliary/translate/translate_generic.c

index 75bbcc8d413bcc0f87f6cd603ad1ccf5db0da79f..2d83e11940fd75b53d5fd791b593c2e22f0bd761 100644 (file)
 #include "draw/draw_vbuf.h"
 #include "draw/draw_vertex.h"
 #include "draw/draw_pt.h"
+#include "translate/translate.h"
+
 
 struct fetch_pipeline_middle_end {
    struct draw_pt_middle_end base;
    struct draw_context *draw;
 
-   const ubyte *input_buf[2];
-
-   struct {
-      const ubyte **input_buf;
-      unsigned input_offset;
-      unsigned output_offset;
-
-      void (*emit)( const float *attrib, void *ptr );
-   } translate[PIPE_MAX_ATTRIBS];
-   unsigned nr_translate;
+   struct translate *translate;
 
    unsigned pipeline_vertex_size;
-   unsigned hw_vertex_size;
    unsigned prim;
 };
 
 
-static void emit_NULL( const float *attrib,
-                      void *ptr )
-{
-}
-
-static void emit_R32_FLOAT( const float *attrib,
-                            void *ptr )
-{
-   float *out = (float *)ptr;
-   out[0] = attrib[0];
-}
-
-static void emit_R32G32_FLOAT( const float *attrib,
-                               void *ptr )
-{
-   float *out = (float *)ptr;
-   out[0] = attrib[0];
-   out[1] = attrib[1];
-}
-
-static void emit_R32G32B32_FLOAT( const float *attrib,
-                                  void *ptr )
-{
-   float *out = (float *)ptr;
-   out[0] = attrib[0];
-   out[1] = attrib[1];
-   out[2] = attrib[2];
-}
-
-static void emit_R32G32B32A32_FLOAT( const float *attrib,
-                                     void *ptr )
-{
-   float *out = (float *)ptr;
-   out[0] = attrib[0];
-   out[1] = attrib[1];
-   out[2] = attrib[2];
-   out[3] = attrib[3];
-}
-
-static void
-emit_B8G8R8A8_UNORM( const float *attrib, void *ptr)
-{
-   ubyte *out = (ubyte *)ptr;
-   out[2] = float_to_ubyte(attrib[0]);
-   out[1] = float_to_ubyte(attrib[1]);
-   out[0] = float_to_ubyte(attrib[2]);
-   out[3] = float_to_ubyte(attrib[3]);
-}
-
 static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
                                     unsigned prim )
 {
@@ -111,6 +54,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
    boolean ok;
    const struct vertex_info *vinfo;
    unsigned dst_offset;
+   struct translate_key hw_key;
 
    fpme->prim = prim;
 
@@ -132,6 +76,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
    for (i = 0; i < vinfo->num_attribs; i++) {
       unsigned emit_sz = 0;
       unsigned src_buffer = 0;
+      unsigned output_format;
       unsigned src_offset = (sizeof(struct vertex_header) + 
                             vinfo->src_index[i] * 4 * sizeof(float) );
 
@@ -139,49 +84,64 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
          
       switch (vinfo->emit[i]) {
       case EMIT_4F:
-         fpme->translate[i].emit = emit_R32G32B32A32_FLOAT;
+         output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
         emit_sz = 4 * sizeof(float);
          break;
       case EMIT_3F:
-         fpme->translate[i].emit = emit_R32G32B32_FLOAT;
+         output_format = PIPE_FORMAT_R32G32B32_FLOAT;
         emit_sz = 3 * sizeof(float);
          break;
       case EMIT_2F:
-         fpme->translate[i].emit = emit_R32G32_FLOAT;
+         output_format = PIPE_FORMAT_R32G32_FLOAT;
         emit_sz = 2 * sizeof(float);
          break;
       case EMIT_1F:
-         fpme->translate[i].emit = emit_R32_FLOAT;
+         output_format = PIPE_FORMAT_R32_FLOAT;
         emit_sz = 1 * sizeof(float);
          break;
       case EMIT_1F_PSIZE:
-         fpme->translate[i].emit = emit_R32_FLOAT;
+         output_format = PIPE_FORMAT_R32_FLOAT;
         emit_sz = 1 * sizeof(float);
          src_buffer = 1;
         src_offset = 0;
          break;
       case EMIT_4UB:
-         fpme->translate[i].emit = emit_B8G8R8A8_UNORM;
+         output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
         emit_sz = 4 * sizeof(ubyte);
       default:
          assert(0);
-         fpme->translate[i].emit = emit_NULL;
+         output_format = PIPE_FORMAT_NONE;
         emit_sz = 0;
          break;
       }
+      
+      hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+      hw_key.element[i].input_buffer = src_buffer;
+      hw_key.element[i].input_offset = src_offset;
+      hw_key.element[i].output_format = output_format;
+      hw_key.element[i].output_offset = dst_offset;
 
-      fpme->translate[i].input_buf = &fpme->input_buf[src_buffer];
-      fpme->translate[i].input_offset = src_offset;
-      fpme->translate[i].output_offset = dst_offset;
       dst_offset += emit_sz;
    }
 
-   fpme->nr_translate = vinfo->num_attribs;
-   fpme->hw_vertex_size = vinfo->size * 4;
+   hw_key.nr_elements = vinfo->num_attribs;
+   hw_key.output_stride = vinfo->size * 4;
+
+   /* Don't bother with caching at this stage:
+    */
+   if (!fpme->translate ||
+       memcmp(&fpme->translate->key, &hw_key, sizeof(hw_key)) != 0) 
+   {
+      if (fpme->translate)
+        fpme->translate->release(fpme->translate);
+
+      fpme->translate = translate_generic_create( &hw_key );
+   }
+
+
 
    //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
    fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION;
-   fpme->hw_vertex_size = vinfo->size * 4;
 }
 
 
@@ -221,46 +181,34 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
                             draw_elts,
                             draw_count );
    } else {
-      unsigned i, j;
+      struct translate *translate = fpme->translate;
       void *hw_verts;
-      char *out_buf;
 
       /* XXX: need to flush to get prim_vbuf.c to release its allocation?? 
        */
       draw_do_flush( draw, DRAW_FLUSH_BACKEND );
 
       hw_verts = draw->render->allocate_vertices(draw->render,
-                                                 (ushort)fpme->hw_vertex_size,
+                                                 (ushort)fpme->translate->key.output_stride,
                                                  (ushort)fetch_count);
       if (!hw_verts) {
          assert(0);
          return;
       }
 
-      out_buf = (char *)hw_verts;
-      fpme->input_buf[0] = (const ubyte *)pipeline_verts;
-      fpme->input_buf[1] = (const ubyte *)&fpme->draw->rasterizer->point_size;
-
-      for (i = 0; i < fetch_count; i++) {
-
-         for (j = 0; j < fpme->nr_translate; j++) {
+      translate->set_buffer(translate, 
+                           0, 
+                           pipeline_verts,
+                           fpme->pipeline_vertex_size );
 
-            const float *attrib = (const float *)( (*fpme->translate[j].input_buf) + 
-                                                  fpme->translate[j].input_offset );
+      translate->set_buffer(translate, 
+                           1, 
+                           &fpme->draw->rasterizer->point_size,
+                           0);
 
-           char *dest = out_buf + fpme->translate[j].output_offset;
-
-           if (0)
-              debug_printf("emiting [%f, %f, %f, %f]\n",
-                           attrib[0], attrib[1],
-                           attrib[2], attrib[3]);
-
-            fpme->translate[j].emit(attrib, dest);
-         }
-
-        fpme->input_buf[0] += fpme->pipeline_vertex_size;
-        out_buf += fpme->hw_vertex_size;
-      }
+      translate->run( translate,
+                     0, fetch_count,
+                     hw_verts );
 
       draw->render->draw(draw->render,
                          draw_elts,
@@ -268,7 +216,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
 
       draw->render->release_vertices(draw->render,
                                      hw_verts,
-                                     fpme->hw_vertex_size,
+                                     fpme->translate->key.output_stride,
                                      fetch_count);
    }
 
index ac6bc7088bc25bbb8b5f08e5a2517e1ad38c2963..4f9f40e51a1501cf247fcf4dc2e78c8c7007007c 100644 (file)
@@ -54,8 +54,17 @@ struct translate_element
 };
 
 
+struct translate_key {
+   unsigned output_stride;
+   unsigned nr_elements;
+   struct translate_element element[PIPE_MAX_ATTRIBS];
+};
+
+
 struct translate {
-   void (*destroy)( struct translate * );
+   struct translate_key key;
+
+   void (*release)( struct translate * );
 
    void (*set_buffer)( struct translate *,
                       unsigned i,
@@ -66,17 +75,30 @@ struct translate {
                     const unsigned *elts,
                     unsigned count,
                     void *output_buffer);
+
+   void (*run)( struct translate *,
+               unsigned start,
+               unsigned count,
+               void *output_buffer);
 };
 
 
 
-struct translate *translate_sse2_create( unsigned output_stride,
-                                        const struct translate_element *elements,
-                                        unsigned nr_elements );
+#if 0
+struct translate_context *translate_context_create( void );
+void translate_context_destroy( struct translate_context * );
+
+struct translate *translate_lookup_or_create( struct translate_context *tctx,
+                                             const struct translate_key *key );
+#endif
+
+
+/*******************************************************************************
+ *  Private:
+ */
+struct translate *translate_sse2_create( const struct translate_key *key );
 
-struct translate *translate_generic_create( unsigned output_stride,
-                                           const struct translate_element *elements,
-                                           unsigned nr_elements );
+struct translate *translate_generic_create( const struct translate_key *key );
 
 
 #endif
index 7e75ba8365ef7314284ce3abda9857dce0fa45f4..fc9060900b473634a09ceeefc602120308ed5d47 100644 (file)
@@ -59,7 +59,6 @@ struct translate_generic {
    } attrib[PIPE_MAX_ATTRIBS];
 
    unsigned nr_attrib;
-   unsigned output_stride;
 };
 
 
@@ -571,7 +570,42 @@ static void generic_run_elts( struct translate *translate,
         tg->attrib[attr].emit( data, dst );
       }
       
-      vert += tg->output_stride;
+      vert += tg->translate.key.output_stride;
+   }
+}
+
+
+
+static void generic_run( struct translate *translate,
+                        unsigned start,
+                        unsigned count,
+                        void *output_buffer )
+{
+   struct translate_generic *tg = translate_generic(translate);
+   char *vert = output_buffer;
+   unsigned nr_attrs = tg->nr_attrib;
+   unsigned attr;
+   unsigned i;
+
+   /* loop over vertex attributes (vertex shader inputs)
+    */
+   for (i = 0; i < count; i++) {
+      unsigned elt = start + i;
+
+      for (attr = 0; attr < nr_attrs; attr++) {
+        float data[4];
+
+        const char *src = (tg->attrib[attr].input_ptr + 
+                           tg->attrib[attr].input_stride * elt);
+
+        char *dst = (vert + 
+                     tg->attrib[attr].output_offset);
+
+        tg->attrib[attr].fetch( src, data );
+        tg->attrib[attr].emit( data, dst );
+      }
+      
+      vert += tg->translate.key.output_stride;
    }
 }
 
@@ -595,14 +629,14 @@ static void generic_set_buffer( struct translate *translate,
 }
 
 
-static void generic_destroy( struct translate *translate )
+static void generic_release( struct translate *translate )
 {
+   /* Refcount?
+    */
    FREE(translate);
 }
 
-struct translate *translate_generic_create( unsigned output_stride,
-                                           const struct translate_element *elements,
-                                           unsigned nr_elements )
+struct translate *translate_generic_create( const struct translate_key *key )
 {
    struct translate_generic *tg = CALLOC_STRUCT(translate_generic);
    unsigned i;
@@ -610,20 +644,24 @@ struct translate *translate_generic_create( unsigned output_stride,
    if (tg == NULL)
       return NULL;
 
-   tg->translate.destroy = generic_destroy;
+   tg->translate.key = *key;
+   tg->translate.release = generic_release;
    tg->translate.set_buffer = generic_set_buffer;
    tg->translate.run_elts = generic_run_elts;
+   tg->translate.run = generic_run;
+
+   for (i = 0; i < key->nr_elements; i++) {
+
+      tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
+      tg->attrib[i].buffer = key->element[i].input_buffer;
+      tg->attrib[i].input_offset = key->element[i].input_offset;
 
-   for (i = 0; i < nr_elements; i++) {
-      tg->attrib[i].fetch = get_fetch_func(elements[i].input_format);
-      tg->attrib[i].buffer = elements[i].input_buffer;
-      tg->attrib[i].input_offset = elements[i].input_offset;
+      tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+      tg->attrib[i].output_offset = key->element[i].output_offset;
 
-      tg->attrib[i].emit = get_emit_func(elements[i].output_format);
-      tg->attrib[i].output_offset = elements[i].output_offset;
    }
 
-   tg->nr_attrib = nr_elements;
+   tg->nr_attrib = key->nr_elements;
 
 
    return &tg->translate;