svga: Performance fixes

[mesa.git] / src / gallium / drivers / svga / svga_shader.c
diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c

index 78eb3f65b618483bd14f65f93e69921c0dceb577..52f1153fd61082590cda36ca2e8965adcbd2914b 100644 (file)
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@@ -25,10 +25,12 @@
  
  #include "util/u_bitmask.h"
  #include "util/u_memory.h"
+#include "util/format/u_format.h"
  #include "svga_context.h"
  #include "svga_cmd.h"
  #include "svga_format.h"
  #include "svga_shader.h"
+#include "svga_resource_texture.h"
  
  
  /**
@@ -160,56 +162,184 @@ svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
     return remap_table[generic_index];
  }
  
+static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_Z,
+   PIPE_SWIZZLE_W,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_Z,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
  
  /**
   * Initialize the shader-neutral fields of svga_compile_key from context
   * state.  This is basically the texture-related state.
   */
  void
-svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+svga_init_shader_key_common(const struct svga_context *svga,
+                            enum pipe_shader_type shader_type,
+                            const struct svga_shader *shader,
                              struct svga_compile_key *key)
  {
     unsigned i, idx = 0;
  
-   assert(shader < Elements(svga->curr.num_sampler_views));
+   assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
+
+   /* In case the number of samplers and sampler_views doesn't match,
+    * loop over the lower of the two counts.
+    */
+   key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
+                            svga->curr.num_samplers[shader_type]);
+
+   for (i = 0; i < key->num_textures; i++) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
+      const struct svga_sampler_state
+         *sampler = svga->curr.sampler[shader_type][i];
  
-   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
-      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
        if (view) {
-         assert(svga->curr.sampler[shader][i]);
           assert(view->texture);
           assert(view->texture->target < (1 << 4)); /* texture_target:4 */
  
-         /* 1D/2D array textures with one slice are treated as non-arrays
-          * by the SVGA3D device.  Convert the texture type here so that
-          * we emit the right TEX/SAMPLE instruction in the shader.
+         /* 1D/2D array textures with one slice and cube map array textures
+          * with one cube are treated as non-arrays by the SVGA3D device.
+          * Set the is_array flag only if we know that we have more than 1
+          * element.  This will be used to select shader instruction/resource
+          * types during shader translation.
            */
-         if (view->texture->target == PIPE_TEXTURE_1D_ARRAY ||
-             view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
-            if (view->texture->array_size == 1) {
-               key->tex[i].is_array = 0;
-            }
-            else {
-               assert(view->texture->array_size > 1);
-               key->tex[i].is_array = 1;
-            }
+         switch (view->texture->target) {
+         case PIPE_TEXTURE_1D_ARRAY:
+         case PIPE_TEXTURE_2D_ARRAY:
+            key->tex[i].is_array = view->texture->array_size > 1;
+            break;
+         case PIPE_TEXTURE_CUBE_ARRAY:
+            key->tex[i].is_array = view->texture->array_size > 6;
+            break;
+         default:
+            ; /* nothing / silence compiler warning */
           }
  
-         key->tex[i].texture_msaa = view->texture->nr_samples > 1;
-         if (!svga->curr.sampler[shader][i]->normalized_coords) {
-            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
-            key->tex[i].width_height_idx = idx++;
+         assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
+         key->tex[i].num_samples = view->texture->nr_samples;
+
+         const enum pipe_swizzle *swizzle_tab;
+         if (view->texture->target == PIPE_BUFFER) {
+            SVGA3dSurfaceFormat svga_format;
+            unsigned tf_flags;
+
+            /* Apply any special swizzle mask for the view format if needed */
+
+            svga_translate_texture_buffer_view_format(view->format,
+                                                      &svga_format, &tf_flags);
+            if (tf_flags & TF_000X)
+               swizzle_tab = set_000X;
+            else if (tf_flags & TF_XXXX)
+               swizzle_tab = set_XXXX;
+            else if (tf_flags & TF_XXX1)
+               swizzle_tab = set_XXX1;
+            else if (tf_flags & TF_XXXY)
+               swizzle_tab = set_XXXY;
+            else
+               swizzle_tab = copy_alpha;
+         }
+         else {
+            /* If we have a non-alpha view into an svga3d surface with an
+             * alpha channel, then explicitly set the alpha channel to 1
+             * when sampling. Note that we need to check the
+             * actual device format to cover also imported surface cases.
+             */
+            swizzle_tab =
+               (!util_format_has_alpha(view->format) &&
+                svga_texture_device_format_has_alpha(view->texture)) ?
+                set_alpha : copy_alpha;
+
+            if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
+                view->texture->format == PIPE_FORMAT_DXT1_SRGB)
+               swizzle_tab = set_alpha;
+
+            /* Save the compare function as we need to handle
+             * depth compare in the shader.
+             */
+            key->tex[i].compare_mode = sampler->compare_mode;
+            key->tex[i].compare_func = sampler->compare_func;
+         }
+
+         key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
+         key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
+         key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
+         key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
+      }
+
+      if (sampler) {
+         if (!sampler->normalized_coords) {
+            if (view) {
+               assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
+               key->tex[i].width_height_idx = idx++;
+           }
              key->tex[i].unnormalized = TRUE;
              ++key->num_unnormalized_coords;
-         }
  
-         key->tex[i].swizzle_r = view->swizzle_r;
-         key->tex[i].swizzle_g = view->swizzle_g;
-         key->tex[i].swizzle_b = view->swizzle_b;
-         key->tex[i].swizzle_a = view->swizzle_a;
+            if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
+                sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
+                key->tex[i].texel_bias = TRUE;
+            }
+         }
        }
     }
-   key->num_textures = svga->curr.num_sampler_views[shader];
+
+   key->clamp_vertex_color = svga->curr.rast ?
+                             svga->curr.rast->templ.clamp_vertex_color : 0;
  }
  
  
@@ -250,7 +380,6 @@ svga_search_shader_token_key(struct svga_shader *pshader,
   */
  static enum pipe_error
  define_gb_shader_vgpu9(struct svga_context *svga,
-                       SVGA3dShaderType type,
                         struct svga_shader_variant *variant,
                         unsigned codeLen)
  {
@@ -262,9 +391,11 @@ define_gb_shader_vgpu9(struct svga_context *svga,
      * Kernel module will allocate an id for the shader and issue
      * the DefineGBShader command.
      */
-   variant->gb_shader = sws->shader_create(sws, type,
+   variant->gb_shader = sws->shader_create(sws, variant->type,
                                             variant->tokens, codeLen);
  
+   svga->hud.shader_mem_used += codeLen;
+
     if (!variant->gb_shader)
        return PIPE_ERROR_OUT_OF_MEMORY;
  
@@ -278,12 +409,12 @@ define_gb_shader_vgpu9(struct svga_context *svga,
   */
  static enum pipe_error
  define_gb_shader_vgpu10(struct svga_context *svga,
-                        SVGA3dShaderType type,
                          struct svga_shader_variant *variant,
                          unsigned codeLen)
  {
     struct svga_winsys_context *swc = svga->swc;
     enum pipe_error ret;
+   unsigned len = codeLen + variant->signatureLen;
  
     /**
      * Shaders in VGPU10 enabled device reside in the device COTable.
@@ -297,8 +428,12 @@ define_gb_shader_vgpu10(struct svga_context *svga,
  
     /* Create gb memory for the shader and upload the shader code */
     variant->gb_shader = swc->shader_create(swc,
-                                           variant->id, type,
-                                           variant->tokens, codeLen);
+                                           variant->id, variant->type,
+                                           variant->tokens, codeLen,
+                                           variant->signature,
+                                           variant->signatureLen);
+
+   svga->hud.shader_mem_used += len;
  
     if (!variant->gb_shader) {
        /* Free the shader ID */
@@ -315,7 +450,8 @@ define_gb_shader_vgpu10(struct svga_context *svga,
      * the shader creation and return an error.
      */
     ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
-                                           variant->id, type, codeLen);
+                                           variant->id, variant->type,
+                                           len);
  
     if (ret != PIPE_OK)
        goto fail;
@@ -340,31 +476,33 @@ fail_no_allocation:
   */
  enum pipe_error
  svga_define_shader(struct svga_context *svga,
-                   SVGA3dShaderType type,
                     struct svga_shader_variant *variant)
  {
     unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
     enum pipe_error ret;
  
+   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
+
     variant->id = UTIL_BITMASK_INVALID_INDEX;
  
     if (svga_have_gb_objects(svga)) {
        if (svga_have_vgpu10(svga))
-         return define_gb_shader_vgpu10(svga, type, variant, codeLen);
+         ret = define_gb_shader_vgpu10(svga, variant, codeLen);
        else
-         return define_gb_shader_vgpu9(svga, type, variant, codeLen);
+         ret = define_gb_shader_vgpu9(svga, variant, codeLen);
     }
     else {
        /* Allocate an integer ID for the shader */
        variant->id = util_bitmask_add(svga->shader_id_bm);
        if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
-         return PIPE_ERROR_OUT_OF_MEMORY;
+         ret = PIPE_ERROR_OUT_OF_MEMORY;
+         goto done;
        }
  
        /* Issue SVGA3D device command to define the shader */
        ret = SVGA3D_DefineShader(svga->swc,
                                  variant->id,
-                                type,
+                                variant->type,
                                  variant->tokens,
                                  codeLen);
        if (ret != PIPE_OK) {
@@ -375,6 +513,8 @@ svga_define_shader(struct svga_context *svga,
        }
     }
  
+done:
+   SVGA_STATS_TIME_POP(svga_sws(svga));
     return ret;
  }
  
@@ -393,7 +533,10 @@ svga_set_shader(struct svga_context *svga,
  
     assert(type == SVGA3D_SHADERTYPE_VS ||
            type == SVGA3D_SHADERTYPE_GS ||
-          type == SVGA3D_SHADERTYPE_PS);
+          type == SVGA3D_SHADERTYPE_PS ||
+          type == SVGA3D_SHADERTYPE_HS ||
+          type == SVGA3D_SHADERTYPE_DS ||
+          type == SVGA3D_SHADERTYPE_CS);
  
     if (svga_have_gb_objects(svga)) {
        struct svga_winsys_gb_shader *gbshader =
@@ -413,30 +556,47 @@ svga_set_shader(struct svga_context *svga,
  
  
  struct svga_shader_variant *
-svga_new_shader_variant(struct svga_context *svga)
+svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
  {
-   svga->hud.num_shaders++;
-   return CALLOC_STRUCT(svga_shader_variant);
+   struct svga_shader_variant *variant;
+
+   switch (type) {
+   case PIPE_SHADER_FRAGMENT:
+      variant = CALLOC(1, sizeof(struct svga_fs_variant));
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      variant = CALLOC(1, sizeof(struct svga_gs_variant));
+      break;
+   case PIPE_SHADER_VERTEX:
+      variant = CALLOC(1, sizeof(struct svga_vs_variant));
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      variant = CALLOC(1, sizeof(struct svga_tes_variant));
+      break;
+   case PIPE_SHADER_TESS_CTRL:
+      variant = CALLOC(1, sizeof(struct svga_tcs_variant));
+      break;
+   default:
+      return NULL;
+   }
+
+   if (variant) {
+      variant->type = svga_shader_type(type);
+      svga->hud.num_shaders++;
+   }
+   return variant;
  }
  
  
-enum pipe_error
+void
  svga_destroy_shader_variant(struct svga_context *svga,
-                            SVGA3dShaderType type,
                              struct svga_shader_variant *variant)
  {
-   enum pipe_error ret = PIPE_OK;
-
     if (svga_have_gb_objects(svga) && variant->gb_shader) {
        if (svga_have_vgpu10(svga)) {
           struct svga_winsys_context *swc = svga->swc;
           swc->shader_destroy(swc, variant->gb_shader);
-         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
-         if (ret != PIPE_OK) {
-            /* flush and try again */
-            svga_context_flush(svga, NULL);
-            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
-         }
+         SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
           util_bitmask_clear(svga->shader_id_bm, variant->id);
        }
        else {
@@ -447,23 +607,17 @@ svga_destroy_shader_variant(struct svga_context *svga,
     }
     else {
        if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
-         ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-         if (ret != PIPE_OK) {
-            /* flush and try again */
-            svga_context_flush(svga, NULL);
-            ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-            assert(ret == PIPE_OK);
-         }
+         SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
+                                               variant->type));
           util_bitmask_clear(svga->shader_id_bm, variant->id);
        }
     }
  
+   FREE(variant->signature);
     FREE((unsigned *)variant->tokens);
     FREE(variant);
  
     svga->hud.num_shaders--;
-
-   return ret;
  }
  
  /*
@@ -491,6 +645,8 @@ svga_rebind_shaders(struct svga_context *svga)
        svga->rebind.flags.vs = 0;
        svga->rebind.flags.gs = 0;
        svga->rebind.flags.fs = 0;
+      svga->rebind.flags.tcs = 0;
+      svga->rebind.flags.tes = 0;
  
        return PIPE_OK;
     }
@@ -516,5 +672,19 @@ svga_rebind_shaders(struct svga_context *svga)
     }
     svga->rebind.flags.fs = 0;
  
+   if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.tcs = 0;
+
+   if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.tes = 0;
+
     return PIPE_OK;
  }