draw: support psize in vs_varient paths
authorKeith Whitwell <keith@tungstengraphics.com>
Tue, 27 May 2008 18:01:57 +0000 (19:01 +0100)
committerKeith Whitwell <keith@tungstengraphics.com>
Tue, 27 May 2008 18:05:16 +0000 (19:05 +0100)
Preserve the vinfo "EMIT_*" format descriptors in the varient key,
and deal with PSIZE directly in each implementation.

src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
src/gallium/auxiliary/draw/draw_vertex.h
src/gallium/auxiliary/draw/draw_vs_aos.c
src/gallium/auxiliary/draw/draw_vs_aos.h
src/gallium/auxiliary/draw/draw_vs_aos_io.c
src/gallium/auxiliary/draw/draw_vs_varient.c

index 85d0bdfcab0752557065554526a0e04ebbe6fffe..729c7db99995fefd4cb06d253acf395fddab5a12 100644 (file)
@@ -77,7 +77,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
    unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
    const struct vertex_info *vinfo;
    unsigned i;
-   boolean need_psize = 0;
    
 
    if (!draw->render->set_primitive( draw->render, 
@@ -123,34 +122,24 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
 
       for (i = 0; i < vinfo->num_attribs; i++) {
          unsigned emit_sz = 0;
-         unsigned output_format = PIPE_FORMAT_NONE;
-         unsigned vs_output = vinfo->src_index[i];
 
          switch (vinfo->emit[i]) {
          case EMIT_4F:
-            output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
             emit_sz = 4 * sizeof(float);
             break;
          case EMIT_3F:
-            output_format = PIPE_FORMAT_R32G32B32_FLOAT;
             emit_sz = 3 * sizeof(float);
             break;
          case EMIT_2F:
-            output_format = PIPE_FORMAT_R32G32_FLOAT;
             emit_sz = 2 * sizeof(float);
             break;
          case EMIT_1F:
-            output_format = PIPE_FORMAT_R32_FLOAT;
             emit_sz = 1 * sizeof(float);
             break;
          case EMIT_1F_PSIZE:
-            need_psize = 1;
-            output_format = PIPE_FORMAT_R32_FLOAT;
             emit_sz = 1 * sizeof(float);
-            vs_output = vinfo->num_attribs + 1;
             break;
          case EMIT_4UB:
-            output_format = PIPE_FORMAT_B8G8R8A8_UNORM;
             emit_sz = 4 * sizeof(ubyte);
             break;
          default:
@@ -162,33 +151,15 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
           * numbers, not to positions in the hw vertex description --
           * that's handled by the output_offset field.
           */
-         fse->key.element[vs_output].out.format = output_format;
-         fse->key.element[vs_output].out.vs_output = vs_output;
-         fse->key.element[vs_output].out.offset = dst_offset;
+         fse->key.element[i].out.format = vinfo->emit[i];
+         fse->key.element[i].out.vs_output = vinfo->src_index[i];
+         fse->key.element[i].out.offset = dst_offset;
       
          dst_offset += emit_sz;
          assert(fse->key.output_stride >= dst_offset);
       }
    }
 
-   /* To make psize work, really need to tell the vertex shader to
-    * copy that value from input->output.  For 'translate' this was
-    * implicit for all elements.
-    */
-#if 0
-   if (need_psize) {
-      unsigned input = num_vs_inputs + 1;
-      const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
-      fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
-      fse->key.element[i].input_buffer = 0; //nr_buffers + 1;
-      fse->key.element[i].input_offset = 0; 
-
-      fse->key.nr_inputs += 1;
-      fse->key.nr_elements = MAX2(fse->key.nr_inputs,
-                                  fse->key.nr_outputs);
-      
-   }
-#endif
 
    /* Would normally look up a vertex shader and peruse its list of
     * varients somehow.  We omitted that step and put all the
index 6d8bac513846c31366cfc4820d70bb23f3b73205..16c65c43175b00f07b6468be6fb1cc425214f34b 100644 (file)
@@ -109,4 +109,25 @@ extern void draw_compute_vertex_size(struct vertex_info *vinfo);
 void draw_dump_emitted_vertex(const struct vertex_info *vinfo, 
                               const uint8_t *data);
 
+
+static INLINE unsigned draw_translate_vinfo_format(unsigned format )
+{
+   switch (format) {
+   case EMIT_1F:
+   case EMIT_1F_PSIZE:
+      return PIPE_FORMAT_R32_FLOAT;
+   case EMIT_2F:
+      return PIPE_FORMAT_R32G32_FLOAT;
+   case EMIT_3F:
+      return PIPE_FORMAT_R32G32B32_FLOAT;
+   case EMIT_4F:
+      return PIPE_FORMAT_R32G32B32A32_FLOAT;
+   case EMIT_4UB:
+      return PIPE_FORMAT_R8G8B8A8_UNORM;
+   default:
+      return PIPE_FORMAT_NONE;
+   }
+}
+
+
 #endif /* DRAW_VERTEX_H */
index 1c63677e6ecec0cb63037c955c5e9f8e943f6fff..d3770b2c53ba123f9000bc5974f66de1d2db6326 100644 (file)
@@ -2126,6 +2126,7 @@ static void vaos_run_elts( struct draw_vs_varient *varient,
 {
    struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
 
+   vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
    vaos->gen_run_elts( varient,
                        elts,
                        count,
@@ -2139,6 +2140,7 @@ static void vaos_run_linear( struct draw_vs_varient *varient,
 {
    struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
 
+   vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
    vaos->gen_run_linear( varient,
                          start,
                          count,
@@ -2204,6 +2206,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
    vaos->base.run_linear = vaos_run_linear;
    vaos->base.run_elts = vaos_run_elts;
 
+   vaos->draw = vs->draw;
    vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
    if (!vaos->machine)
       goto fail;
index fffe2e4658c8139df7b6c0c920e53c9ac4b5a76a..b47413ff43c46e784984e13f6b267c2c23ca2e2f 100644 (file)
@@ -176,6 +176,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp );
 #define IMM_255      4              /* 255, 255, 255, 255 */
 #define IMM_NEGS     5              /* -1,-1,-1,-1 */
 #define IMM_RSQ      6              /* -.5,1.5,_,_ */
+#define IMM_PSIZE    7              /* not really an immediate - updated each run */
 
 struct x86_reg aos_get_internal( struct aos_compilation *cp,
                                  unsigned imm );
index cebfaf6474de902b539e6e96b8b1ba9fe2d3ced8..836110f382d72396f00d8367b7fecd4103daa137 100644 (file)
@@ -33,6 +33,7 @@
 #include "tgsi/exec/tgsi_exec.h"
 #include "draw_vs.h"
 #include "draw_vs_aos.h"
+#include "draw_vertex.h"
 
 #include "rtasm/rtasm_x86sse.h"
 
@@ -249,24 +250,27 @@ static boolean emit_output( struct aos_compilation *cp,
                             unsigned format )
 {
    switch (format) {
-   case PIPE_FORMAT_R32_FLOAT:
+   case EMIT_1F:
+   case EMIT_1F_PSIZE:
       emit_store_R32(cp, ptr, dataXMM);
       break;
-   case PIPE_FORMAT_R32G32_FLOAT:
+   case EMIT_2F:
       emit_store_R32G32(cp, ptr, dataXMM);
       break;
-   case PIPE_FORMAT_R32G32B32_FLOAT:
+   case EMIT_3F:
       emit_store_R32G32B32(cp, ptr, dataXMM);
       break;
-   case PIPE_FORMAT_R32G32B32A32_FLOAT:
+   case EMIT_4F:
       emit_store_R32G32B32A32(cp, ptr, dataXMM);
       break;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-      emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
-      emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
-      break;
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
-      emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+   case EMIT_4UB:
+      if (1) {
+         emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+         emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+      }
+      else {
+         emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+      }
       break;
    default:
       ERROR(cp, "unhandled output format");
@@ -287,9 +291,16 @@ boolean aos_emit_outputs( struct aos_compilation *cp )
       unsigned offset = cp->vaos->base.key.element[i].out.offset;
       unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
 
-      struct x86_reg data = aos_get_shader_reg( cp, 
-                                                TGSI_FILE_OUTPUT,
-                                                vs_output );
+      struct x86_reg data;
+
+      if (format == EMIT_1F_PSIZE) {
+         data = aos_get_internal_xmm( cp, IMM_PSIZE );
+      }
+      else {
+         data = aos_get_shader_reg( cp, 
+                                    TGSI_FILE_OUTPUT,
+                                    vs_output );
+      }
 
       if (data.file != file_XMM) {
          struct x86_reg tmp = aos_get_xmm_reg( cp );
index dab46e8eed3173c4e18752b5f5f181bee95c2fc4..119a3a04b566f94845eaa035f454da05b3de0899 100644 (file)
@@ -231,6 +231,10 @@ static void vsvg_run_linear( struct draw_vs_varient *varient,
                               output_buffer,
                               vsvg->base.key.output_stride );
 
+      vsvg->emit->set_buffer( vsvg->emit, 
+                              1,
+                              &vsvg->draw->rasterizer->point_size,
+                              0);
 
       vsvg->emit->run( vsvg->emit,
                        0, count,
@@ -293,11 +297,21 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
    emit.nr_elements = key->nr_outputs;
    emit.output_stride = key->output_stride;
    for (i = 0; i < key->nr_outputs; i++) {
-      emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-      emit.element[i].input_buffer = 0;
-      emit.element[i].input_offset = i * 4 * sizeof(float);
-      emit.element[i].output_format = key->element[i].out.format;
-      emit.element[i].output_offset = key->element[i].out.offset;
+      if (key->element[i].out.format != EMIT_1F_PSIZE)
+      {      
+         emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+         emit.element[i].input_buffer = 0;
+         emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
+         emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
+         emit.element[i].output_offset = key->element[i].out.offset;
+      }
+      else {
+         emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
+         emit.element[i].input_buffer = 1;
+         emit.element[i].input_offset = 0;
+         emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
+         emit.element[i].output_offset = key->element[i].out.offset;
+      }
    }
 
    vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );