svga: implement support for signed byte vertex attributes
authorBrian Paul <brianp@vmware.com>
Thu, 17 Apr 2014 15:54:47 +0000 (08:54 -0700)
committerBrian Paul <brianp@vmware.com>
Thu, 17 Apr 2014 18:29:33 +0000 (11:29 -0700)
There's no SVGA3D_DECLTYPE that directly corresponds to
PIPE_FORMAT_R8G8B8_SNORM.  Previously, we used the swtnl fallback
path to handle this but that's slow and causes invariance issues.
Now we fetch the attribute as SVGA3D_DECLTYPE_UBYTE4N and insert
some extra VS instructions to remap the attributes from the range
[0,1] to the range[-1,1].

Fixes Sauerbraten sw fallback.
Fixes piglit normal3b3s-invariance test.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
src/gallium/drivers/svga/svga_context.h
src/gallium/drivers/svga/svga_pipe_vertex.c
src/gallium/drivers/svga/svga_state_vs.c
src/gallium/drivers/svga/svga_tgsi.h
src/gallium/drivers/svga/svga_tgsi_emit.h
src/gallium/drivers/svga/svga_tgsi_insn.c

index 79cecbf3221079539df1661c79a6d34e5e8c2389..55642773f896c0a9c18fc97b33c1522977d10c8a 100644 (file)
@@ -201,6 +201,7 @@ struct svga_velems_state {
    unsigned count;
    struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
    SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */
+   unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */
 };
 
 /* Use to calculate differences between state emitted to hardware and
index e34f3a00eeb95af2bbddd7d57712c71701747ddf..d679ad3bdf3bc577c8593c3ea2577f58b527ad21 100644 (file)
@@ -94,6 +94,9 @@ translate_vertex_format(enum pipe_format format)
    case PIPE_FORMAT_R16G16_FLOAT:         return SVGA3D_DECLTYPE_FLOAT16_2;
    case PIPE_FORMAT_R16G16B16A16_FLOAT:   return SVGA3D_DECLTYPE_FLOAT16_4;
 
+   /* See attrib_needs_adjustment() below */
+   case PIPE_FORMAT_R8G8B8_SNORM:         return SVGA3D_DECLTYPE_UBYTE4N;
+
    default:
       /* There are many formats without hardware support.  This case
        * will be hit regularly, meaning we'll need swvfetch.
@@ -103,6 +106,23 @@ translate_vertex_format(enum pipe_format format)
 }
 
 
+/**
+ * Does the given vertex attrib format need range adjustment in the VS?
+ * Range adjustment scales and biases values from [0,1] to [-1,1].
+ * This lets us avoid the swtnl path.
+ */
+static boolean
+attrib_needs_range_adjustment(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_R8G8B8_SNORM:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
+
+
 static void *
 svga_create_vertex_elements_state(struct pipe_context *pipe,
                                   unsigned count,
@@ -117,9 +137,16 @@ svga_create_vertex_elements_state(struct pipe_context *pipe,
       velems->count = count;
       memcpy(velems->velem, attribs, sizeof(*attribs) * count);
 
+      velems->adjust_attrib_range = 0x0;
+
       /* Translate Gallium vertex format to SVGA3dDeclType */
       for (i = 0; i < count; i++) {
-         velems->decl_type[i] = translate_vertex_format(attribs[i].src_format);
+         enum pipe_format f = attribs[i].src_format;
+         velems->decl_type[i] = translate_vertex_format(f);
+
+         if (attrib_needs_range_adjustment(f)) {
+            velems->adjust_attrib_range |= (1 << i);
+         }
       }
    }
    return velems;
index 2f130aec5b496158bf1ea43dea10538b7abde2a7..2ea25495b202ec8282b58a26efe003fdb110952b 100644 (file)
@@ -159,6 +159,9 @@ make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key)
 
    /* SVGA_NEW_FS */
    key->fs_generic_inputs = svga->curr.fs->generic_inputs;
+
+   /* SVGA_NEW_VELEMENT */
+   key->adjust_attrib_range = svga->curr.velems->adjust_attrib_range;
 }
 
 
@@ -248,6 +251,7 @@ struct svga_tracked_state svga_hw_vs =
    (SVGA_NEW_VS |
     SVGA_NEW_FS |
     SVGA_NEW_PRESCALE |
+    SVGA_NEW_VELEMENT |
     SVGA_NEW_NEED_SWTNL),
    emit_hw_vs
 };
index 4fe88b3b70d95d62983ca4ee8b109190102402e9..cb40560242a4ed586fd7ebc54ccb031129b671c2 100644 (file)
@@ -49,6 +49,7 @@ struct svga_vs_compile_key
    unsigned fs_generic_inputs;
    unsigned need_prescale:1;
    unsigned allow_psiz:1;
+   unsigned adjust_attrib_range:16;
 };
 
 struct svga_fs_compile_key
index 1a9731ffde8690c5c3259e59ab9769cec4c1df3d..1894296e6d7b2e73649b82f787732311c8831345 100644 (file)
@@ -86,7 +86,7 @@ struct svga_shader_emitter
    boolean in_main_func;
 
    boolean created_common_immediate;
-   int common_immediate_idx;
+   int common_immediate_idx[2];
 
    boolean created_loop_const;
    int loop_const_idx;
index d357058240485fbabf566177f9ca2ddfba0ee436..e798b17daa752c146b70ddff688046267a921b3a 100644 (file)
@@ -859,8 +859,20 @@ create_common_immediate( struct svga_shader_emitter *emit )
    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
                         idx, 0.0f, 0.5f, -1.0f, 1.0f ))
       return FALSE;
+   emit->common_immediate_idx[0] = idx;
+   idx++;
+
+   /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
+   if (emit->key.vkey.adjust_attrib_range) {
+      if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+                           idx, 2.0f, 0.0f, 0.0f, 0.0f ))
+         return FALSE;
+      emit->common_immediate_idx[1] = idx;
+   }
+   else {
+      emit->common_immediate_idx[1] = -1;
+   }
 
-   emit->common_immediate_idx = idx;
    emit->created_common_immediate = TRUE;
 
    return TRUE;
@@ -889,7 +901,7 @@ common_immediate_swizzle(float value)
 
 
 /**
- * Returns an immediate reg where all the terms are either 0, 1, -1 or 0.5
+ * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5
  */
 static struct src_register
 get_immediate(struct svga_shader_emitter *emit,
@@ -900,8 +912,8 @@ get_immediate(struct svga_shader_emitter *emit,
    unsigned sz = common_immediate_swizzle(z);
    unsigned sw = common_immediate_swizzle(w);
    assert(emit->created_common_immediate);
-   assert(emit->common_immediate_idx >= 0);
-   return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx),
+   assert(emit->common_immediate_idx[0] >= 0);
+   return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
                   sx, sy, sz, sw);
 }
 
@@ -913,9 +925,9 @@ static struct src_register
 get_zero_immediate( struct svga_shader_emitter *emit )
 {
    assert(emit->created_common_immediate);
-   assert(emit->common_immediate_idx >= 0);
+   assert(emit->common_immediate_idx[0] >= 0);
    return swizzle(src_register( SVGA3DREG_CONST,
-                                emit->common_immediate_idx),
+                                emit->common_immediate_idx[0]),
                   0, 0, 0, 0);
 }
 
@@ -927,9 +939,9 @@ static struct src_register
 get_one_immediate( struct svga_shader_emitter *emit )
 {
    assert(emit->created_common_immediate);
-   assert(emit->common_immediate_idx >= 0);
+   assert(emit->common_immediate_idx[0] >= 0);
    return swizzle(src_register( SVGA3DREG_CONST,
-                                emit->common_immediate_idx),
+                                emit->common_immediate_idx[0]),
                   3, 3, 3, 3);
 }
 
@@ -941,12 +953,27 @@ static struct src_register
 get_half_immediate( struct svga_shader_emitter *emit )
 {
    assert(emit->created_common_immediate);
-   assert(emit->common_immediate_idx >= 0);
-   return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx),
+   assert(emit->common_immediate_idx[0] >= 0);
+   return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
                   1, 1, 1, 1);
 }
 
 
+/**
+ * returns {2, 2, 2, 2} immediate
+ */
+static struct src_register
+get_two_immediate( struct svga_shader_emitter *emit )
+{
+   /* Note we use the second common immediate here */
+   assert(emit->created_common_immediate);
+   assert(emit->common_immediate_idx[1] >= 0);
+   return swizzle(src_register( SVGA3DREG_CONST,
+                                emit->common_immediate_idx[1]),
+                  0, 0, 0, 0);
+}
+
+
 /**
  * returns the loop const
  */
@@ -3497,6 +3524,74 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit)
 }
 
 
+/**
+ * Emit code to invert the T component of the incoming texture coordinate.
+ * This is used for drawing point sprites when
+ * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
+ */
+static boolean
+emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
+{
+   unsigned adjust_attrib_range = emit->key.vkey.adjust_attrib_range;
+
+   while (adjust_attrib_range) {
+      /* The vertex input/attribute is supposed to be a signed value in
+       * the range [-1,1] but we actually fetched/converted it to the
+       * range [0,1].  This most likely happens when the app specifies a
+       * signed byte attribute but we interpreted it as unsigned bytes.
+       * See also svga_translate_vertex_format().
+       *
+       * Here, we emit some extra instructions to adjust
+       * the attribute values from [0,1] to [-1,1].
+       *
+       * The adjustment we implement is:
+       *   new_attrib = attrib * 2.0;
+       *   if (attrib >= 0.5)
+       *      new_attrib = new_attrib - 2.0;
+       * This isn't exactly right (it's off by a bit or so) but close enough.
+       */
+      const unsigned index = u_bit_scan(&adjust_attrib_range);
+      struct src_register tmp;
+
+      SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
+
+      /* allocate a temp reg */
+      tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
+      emit->nr_hw_temp++;
+
+      /* tmp = attrib * 2.0 */
+      if (!submit_op2(emit,
+                      inst_token(SVGA3DOP_MUL),
+                      dst(tmp),
+                      emit->input_map[index],
+                      get_two_immediate(emit)))
+         return FALSE;
+
+      /* pred = (attrib >= 0.5) */
+      if (!submit_op2(emit,
+                      inst_token_setp(SVGA3DOPCOMP_GE),
+                      pred_reg,
+                      emit->input_map[index],  /* vert attrib */
+                      get_half_immediate(emit)))  /* 0.5 */
+         return FALSE;
+
+      /* sub(pred) tmp, tmp, 2.0 */
+      if (!submit_op3(emit,
+                      inst_token_predicated(SVGA3DOP_SUB),
+                      dst(tmp),
+                      src(pred_reg),
+                      tmp,
+                      get_two_immediate(emit)))
+         return FALSE;
+
+      /* Reassign the input_map entry to the new tmp register */
+      emit->input_map[index] = tmp;
+   }
+
+   return TRUE;
+}
+
+
 /**
  * Determine if we need to create the "common" immediate value which is
  * used for generating useful vector constants such as {0,0,0,0} and
@@ -3542,10 +3637,11 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
             return TRUE;
       }
    }
-
-   if (emit->unit == PIPE_SHADER_VERTEX) {
+   else if (emit->unit == PIPE_SHADER_VERTEX) {
       if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
          return TRUE;
+      if (emit->key.vkey.adjust_attrib_range)
+         return TRUE;
    }
 
    if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
@@ -3705,6 +3801,14 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit)
             return FALSE;
       }
    }
+   else {
+      assert(emit->unit == PIPE_SHADER_VERTEX);
+      if (emit->key.vkey.adjust_attrib_range) {
+         if (!emit_adjusted_vertex_attribs(emit))
+            return FALSE;
+      }
+   }
+
 
    return TRUE;
 }