gallium: add PIPE_CAP_MAX_VARYINGS

[mesa.git] / src / gallium / drivers / svga / svga_shader.c
diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c

index 6b6b441cb828e37ec06321146ff61bdc0918e1fc..22e449835d54c773e8a60fc85ee75a8dec33d7a9 100644 (file)
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@@ -25,16 +25,434 @@
  
  #include "util/u_bitmask.h"
  #include "util/u_memory.h"
+#include "util/u_format.h"
  #include "svga_context.h"
  #include "svga_cmd.h"
+#include "svga_format.h"
  #include "svga_shader.h"
+#include "svga_resource_texture.h"
  
  
+/**
+ * This bit isn't really used anywhere.  It only serves to help
+ * generate a unique "signature" for the vertex shader output bitmask.
+ * Shader input/output signatures are used to resolve shader linking
+ * issues.
+ */
+#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
+
+
+/**
+ * Use the shader info to generate a bitmask indicating which generic
+ * inputs are used by the shader.  A set bit indicates that GENERIC[i]
+ * is used.
+ */
+uint64_t
+svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_inputs; i++) {
+      if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+         unsigned j = info->input_semantic_index[i];
+         assert(j < sizeof(mask) * 8);
+         mask |= ((uint64_t) 1) << j;
+      }
+   }
+
+   return mask;
+}
+
+
+/**
+ * Scan shader info to return a bitmask of written outputs.
+ */
+uint64_t
+svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
+{
+   unsigned i;
+   uint64_t mask = 0x0;
+
+   for (i = 0; i < info->num_outputs; i++) {
+      switch (info->output_semantic_name[i]) {
+      case TGSI_SEMANTIC_GENERIC:
+         {
+            unsigned j = info->output_semantic_index[i];
+            assert(j < sizeof(mask) * 8);
+            mask |= ((uint64_t) 1) << j;
+         }
+         break;
+      case TGSI_SEMANTIC_FOG:
+         mask |= FOG_GENERIC_BIT;
+         break;
+      }
+   }
+
+   return mask;
+}
+
+
+
+/**
+ * Given a mask of used generic variables (as returned by the above functions)
+ * fill in a table which maps those indexes to small integers.
+ * This table is used by the remap_generic_index() function in
+ * svga_tgsi_decl_sm30.c
+ * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
+ * GENERIC[3] are used.  The remap_table will contain:
+ *   table[1] = 0;
+ *   table[3] = 1;
+ * The remaining table entries will be filled in with the next unused
+ * generic index (in this example, 2).
+ */
+void
+svga_remap_generics(uint64_t generics_mask,
+                    int8_t remap_table[MAX_GENERIC_VARYING])
+{
+   /* Note texcoord[0] is reserved so start at 1 */
+   unsigned count = 1, i;
+
+   for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+      remap_table[i] = -1;
+   }
+
+   /* for each bit set in generic_mask */
+   while (generics_mask) {
+      unsigned index = ffsll(generics_mask) - 1;
+      remap_table[index] = count++;
+      generics_mask &= ~((uint64_t) 1 << index);
+   }
+}
+
+
+/**
+ * Use the generic remap table to map a TGSI generic varying variable
+ * index to a small integer.  If the remapping table doesn't have a
+ * valid value for the given index (the table entry is -1) it means
+ * the fragment shader doesn't use that VS output.  Just allocate
+ * the next free value in that case.  Alternately, we could cull
+ * VS instructions that write to register, or replace the register
+ * with a dummy temp register.
+ * XXX TODO: we should do one of the later as it would save precious
+ * texcoord registers.
+ */
+int
+svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
+                         int generic_index)
+{
+   assert(generic_index < MAX_GENERIC_VARYING);
+
+   if (generic_index >= MAX_GENERIC_VARYING) {
+      /* just don't return a random/garbage value */
+      generic_index = MAX_GENERIC_VARYING - 1;
+   }
+
+   if (remap_table[generic_index] == -1) {
+      /* This is a VS output that has no matching PS input.  Find a
+       * free index.
+       */
+      int i, max = 0;
+      for (i = 0; i < MAX_GENERIC_VARYING; i++) {
+         max = MAX2(max, remap_table[i]);
+      }
+      remap_table[generic_index] = max + 1;
+   }
+
+   return remap_table[generic_index];
+}
+
+static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_Z,
+   PIPE_SWIZZLE_W,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_Z,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+
+/**
+ * Initialize the shader-neutral fields of svga_compile_key from context
+ * state.  This is basically the texture-related state.
+ */
+void
+svga_init_shader_key_common(const struct svga_context *svga,
+                            enum pipe_shader_type shader,
+                            struct svga_compile_key *key)
+{
+   unsigned i, idx = 0;
+
+   assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
+
+   /* In case the number of samplers and sampler_views doesn't match,
+    * loop over the lower of the two counts.
+    */
+   key->num_textures = MAX2(svga->curr.num_sampler_views[shader],
+                            svga->curr.num_samplers[shader]);
+
+   for (i = 0; i < key->num_textures; i++) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
+      const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+      if (view) {
+         assert(view->texture);
+         assert(view->texture->target < (1 << 4)); /* texture_target:4 */
+
+         /* 1D/2D array textures with one slice and cube map array textures
+          * with one cube are treated as non-arrays by the SVGA3D device.
+          * Set the is_array flag only if we know that we have more than 1
+          * element.  This will be used to select shader instruction/resource
+          * types during shader translation.
+          */
+         switch (view->texture->target) {
+         case PIPE_TEXTURE_1D_ARRAY:
+         case PIPE_TEXTURE_2D_ARRAY:
+            key->tex[i].is_array = view->texture->array_size > 1;
+            break;
+         case PIPE_TEXTURE_CUBE_ARRAY:
+            key->tex[i].is_array = view->texture->array_size > 6;
+            break;
+         default:
+            ; /* nothing / silence compiler warning */
+         }
+
+         assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
+         key->tex[i].num_samples = view->texture->nr_samples;
+
+         const enum pipe_swizzle *swizzle_tab;
+         if (view->texture->target == PIPE_BUFFER) {
+            SVGA3dSurfaceFormat svga_format;
+            unsigned tf_flags;
+
+            /* Apply any special swizzle mask for the view format if needed */
+
+            svga_translate_texture_buffer_view_format(view->format,
+                                                      &svga_format, &tf_flags);
+            if (tf_flags & TF_000X)
+               swizzle_tab = set_000X;
+            else if (tf_flags & TF_XXXX)
+               swizzle_tab = set_XXXX;
+            else if (tf_flags & TF_XXX1)
+               swizzle_tab = set_XXX1;
+            else if (tf_flags & TF_XXXY)
+               swizzle_tab = set_XXXY;
+            else
+               swizzle_tab = copy_alpha;
+         }
+         else {
+            /* If we have a non-alpha view into an svga3d surface with an
+             * alpha channel, then explicitly set the alpha channel to 1
+             * when sampling. Note that we need to check the
+             * actual device format to cover also imported surface cases.
+             */
+            swizzle_tab =
+               (!util_format_has_alpha(view->format) &&
+                svga_texture_device_format_has_alpha(view->texture)) ?
+                set_alpha : copy_alpha;
+
+            if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
+                view->texture->format == PIPE_FORMAT_DXT1_SRGB)
+               swizzle_tab = set_alpha;
+         }
+
+         key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
+         key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
+         key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
+         key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
+      }
+
+      if (sampler) {
+         if (!sampler->normalized_coords) {
+            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
+            key->tex[i].width_height_idx = idx++;
+            key->tex[i].unnormalized = TRUE;
+            ++key->num_unnormalized_coords;
+
+            if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
+                sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
+                key->tex[i].texel_bias = TRUE;
+            }
+         }
+      }
+   }
+}
+
+
+/** Search for a compiled shader variant with the same compile key */
+struct svga_shader_variant *
+svga_search_shader_key(const struct svga_shader *shader,
+                       const struct svga_compile_key *key)
+{
+   struct svga_shader_variant *variant = shader->variants;
+
+   assert(key);
+
+   for ( ; variant; variant = variant->next) {
+      if (svga_compile_keys_equal(key, &variant->key))
+         return variant;
+   }
+   return NULL;
+}
+
+/** Search for a shader with the same token key */
+struct svga_shader *
+svga_search_shader_token_key(struct svga_shader *pshader,
+                             const struct svga_token_key *key)
+{
+   struct svga_shader *shader = pshader;
+
+   assert(key);
+
+   for ( ; shader; shader = shader->next) {
+      if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
+         return shader;
+   }
+   return NULL;
+}
+
+/**
+ * Helper function to define a gb shader for non-vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu9(struct svga_context *svga,
+                       SVGA3dShaderType type,
+                       struct svga_shader_variant *variant,
+                       unsigned codeLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret;
+
+   /**
+    * Create gb memory for the shader and upload the shader code.
+    * Kernel module will allocate an id for the shader and issue
+    * the DefineGBShader command.
+    */
+   variant->gb_shader = sws->shader_create(sws, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
+
+   return ret;
+}
+
+/**
+ * Helper function to define a gb shader for vgpu10 device
+ */
+static enum pipe_error
+define_gb_shader_vgpu10(struct svga_context *svga,
+                        SVGA3dShaderType type,
+                        struct svga_shader_variant *variant,
+                        unsigned codeLen)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   enum pipe_error ret;
+
+   /**
+    * Shaders in VGPU10 enabled device reside in the device COTable.
+    * SVGA driver will allocate an integer ID for the shader and
+    * issue DXDefineShader and DXBindShader commands.
+    */
+   variant->id = util_bitmask_add(svga->shader_id_bm);
+   if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
+
+   /* Create gb memory for the shader and upload the shader code */
+   variant->gb_shader = swc->shader_create(swc,
+                                           variant->id, type,
+                                           variant->tokens, codeLen);
+
+   if (!variant->gb_shader) {
+      /* Free the shader ID */
+      assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
+      goto fail_no_allocation;
+   }
+
+   /**
+    * Since we don't want to do any flush within state emission to avoid
+    * partial state in a command buffer, it's important to make sure that
+    * there is enough room to send both the DXDefineShader & DXBindShader
+    * commands in the same command buffer. So let's send both
+    * commands in one command reservation. If it fails, we'll undo
+    * the shader creation and return an error.
+    */
+   ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
+                                           variant->id, type, codeLen);
+
+   if (ret != PIPE_OK)
+      goto fail;
+
+   return PIPE_OK;
+
+fail:
+   swc->shader_destroy(swc, variant->gb_shader);
+   variant->gb_shader = NULL;
+
+fail_no_allocation:
+   util_bitmask_clear(svga->shader_id_bm, variant->id);
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
  
  /**
   * Issue the SVGA3D commands to define a new shader.
- * \param result  contains the shader tokens, etc.  The result->id field will
- *                be set here.
+ * \param variant  contains the shader tokens, etc.  The result->id field will
+ *                 be set here.
   */
  enum pipe_error
  svga_define_shader(struct svga_context *svga,
@@ -42,24 +460,24 @@ svga_define_shader(struct svga_context *svga,
                     struct svga_shader_variant *variant)
  {
     unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
+   enum pipe_error ret;
  
-   if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
  
-      variant->gb_shader = sws->shader_create(sws, type,
-                                              variant->tokens, codeLen);
-      if (!variant->gb_shader)
-         return PIPE_ERROR_OUT_OF_MEMORY;
+   variant->id = UTIL_BITMASK_INVALID_INDEX;
  
-      return PIPE_OK;
+   if (svga_have_gb_objects(svga)) {
+      if (svga_have_vgpu10(svga))
+         ret = define_gb_shader_vgpu10(svga, type, variant, codeLen);
+      else
+         ret = define_gb_shader_vgpu9(svga, type, variant, codeLen);
     }
     else {
-      enum pipe_error ret;
-
        /* Allocate an integer ID for the shader */
        variant->id = util_bitmask_add(svga->shader_id_bm);
        if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
-         return PIPE_ERROR_OUT_OF_MEMORY;
+         ret = PIPE_ERROR_OUT_OF_MEMORY;
+         goto done;
        }
  
        /* Issue SVGA3D device command to define the shader */
@@ -73,48 +491,150 @@ svga_define_shader(struct svga_context *svga,
           assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
           util_bitmask_clear(svga->shader_id_bm, variant->id);
           variant->id = UTIL_BITMASK_INVALID_INDEX;
-         return ret;
        }
     }
  
-   return PIPE_OK;
+done:
+   SVGA_STATS_TIME_POP(svga_sws(svga));
+   return ret;
  }
  
  
-
+/**
+ * Issue the SVGA3D commands to set/bind a shader.
+ * \param result  the shader to bind.
+ */
  enum pipe_error
-svga_destroy_shader_variant(struct svga_context *svga,
-                            SVGA3dShaderType type,
-                            struct svga_shader_variant *variant)
+svga_set_shader(struct svga_context *svga,
+                SVGA3dShaderType type,
+                struct svga_shader_variant *variant)
  {
-   enum pipe_error ret = PIPE_OK;
+   enum pipe_error ret;
+   unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
+
+   assert(type == SVGA3D_SHADERTYPE_VS ||
+          type == SVGA3D_SHADERTYPE_GS ||
+          type == SVGA3D_SHADERTYPE_PS);
  
     if (svga_have_gb_objects(svga)) {
-      struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+      struct svga_winsys_gb_shader *gbshader =
+         variant ? variant->gb_shader : NULL;
  
-      sws->shader_destroy(sws, variant->gb_shader);
-      variant->gb_shader = NULL;
-      goto end;
+      if (svga_have_vgpu10(svga))
+         ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
+      else
+         ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
+   }
+   else {
+      ret = SVGA3D_SetShader(svga->swc, type, id);
     }
  
-   /* first try */
-   if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
-      ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
+   return ret;
+}
  
-      if (ret != PIPE_OK) {
-         /* flush and try again */
-         svga_context_flush(svga, NULL);
  
+struct svga_shader_variant *
+svga_new_shader_variant(struct svga_context *svga)
+{
+   svga->hud.num_shaders++;
+   return CALLOC_STRUCT(svga_shader_variant);
+}
+
+
+void
+svga_destroy_shader_variant(struct svga_context *svga,
+                            SVGA3dShaderType type,
+                            struct svga_shader_variant *variant)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   if (svga_have_gb_objects(svga) && variant->gb_shader) {
+      if (svga_have_vgpu10(svga)) {
+         struct svga_winsys_context *swc = svga->swc;
+         swc->shader_destroy(swc, variant->gb_shader);
+         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
+            assert(ret == PIPE_OK);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
+      }
+      else {
+         struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+         sws->shader_destroy(sws, variant->gb_shader);
+      }
+      variant->gb_shader = NULL;
+   }
+   else {
+      if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
           ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-         assert(ret == PIPE_OK);
+         if (ret != PIPE_OK) {
+            /* flush and try again */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
+            assert(ret == PIPE_OK);
+         }
+         util_bitmask_clear(svga->shader_id_bm, variant->id);
        }
-
-      util_bitmask_clear(svga->shader_id_bm, variant->id);
     }
  
-end:
     FREE((unsigned *)variant->tokens);
     FREE(variant);
  
-   return ret;
+   svga->hud.num_shaders--;
+}
+
+/*
+ * Rebind shaders.
+ * Called at the beginning of every new command buffer to ensure that
+ * shaders are properly paged-in. Instead of sending the SetShader
+ * command, this function sends a private allocation command to
+ * page in a shader. This avoids emitting redundant state to the device
+ * just to page in a resource.
+ */
+enum pipe_error
+svga_rebind_shaders(struct svga_context *svga)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   struct svga_hw_draw_state *hw = &svga->state.hw_draw;
+   enum pipe_error ret;
+
+   assert(svga_have_vgpu10(svga));
+
+   /**
+    * If the underlying winsys layer does not need resource rebinding,
+    * just clear the rebind flags and return.
+    */
+   if (swc->resource_rebind == NULL) {
+      svga->rebind.flags.vs = 0;
+      svga->rebind.flags.gs = 0;
+      svga->rebind.flags.fs = 0;
+
+      return PIPE_OK;
+   }
+
+   if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.vs = 0;
+
+   if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.gs = 0;
+
+   if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.fs = 0;
+
+   return PIPE_OK;
  }