svga: Performance fixes
[mesa.git] / src / gallium / drivers / svga / svga_shader.c
index ebf1131d51d1e8238578dab51b92fdec40c7db73..52f1153fd61082590cda36ca2e8965adcbd2914b 100644 (file)
@@ -25,7 +25,7 @@
 
 #include "util/u_bitmask.h"
 #include "util/u_memory.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "svga_context.h"
 #include "svga_cmd.h"
 #include "svga_format.h"
@@ -182,42 +182,90 @@ static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
    PIPE_SWIZZLE_NONE
 };
 
+static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+
 /**
  * Initialize the shader-neutral fields of svga_compile_key from context
  * state.  This is basically the texture-related state.
  */
 void
 svga_init_shader_key_common(const struct svga_context *svga,
-                            enum pipe_shader_type shader,
+                            enum pipe_shader_type shader_type,
+                            const struct svga_shader *shader,
                             struct svga_compile_key *key)
 {
    unsigned i, idx = 0;
 
-   assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
+   assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
 
    /* In case the number of samplers and sampler_views doesn't match,
     * loop over the lower of the two counts.
     */
-   key->num_textures = MAX2(svga->curr.num_sampler_views[shader],
-                            svga->curr.num_samplers[shader]);
+   key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
+                            svga->curr.num_samplers[shader_type]);
 
    for (i = 0; i < key->num_textures; i++) {
-      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
-      const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
+      const struct svga_sampler_state
+         *sampler = svga->curr.sampler[shader_type][i];
+
       if (view) {
          assert(view->texture);
          assert(view->texture->target < (1 << 4)); /* texture_target:4 */
 
-         /* 1D/2D array textures with one slice are treated as non-arrays
-          * by the SVGA3D device.  Set the is_array flag only if we know that
-          * we have more than 1 element.  This will be used to select shader
-          * instruction/resource types during shader translation.
+         /* 1D/2D array textures with one slice and cube map array textures
+          * with one cube are treated as non-arrays by the SVGA3D device.
+          * Set the is_array flag only if we know that we have more than 1
+          * element.  This will be used to select shader instruction/resource
+          * types during shader translation.
           */
          switch (view->texture->target) {
          case PIPE_TEXTURE_1D_ARRAY:
          case PIPE_TEXTURE_2D_ARRAY:
             key->tex[i].is_array = view->texture->array_size > 1;
             break;
+         case PIPE_TEXTURE_CUBE_ARRAY:
+            key->tex[i].is_array = view->texture->array_size > 6;
+            break;
          default:
             ; /* nothing / silence compiler warning */
          }
@@ -225,20 +273,47 @@ svga_init_shader_key_common(const struct svga_context *svga,
          assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
          key->tex[i].num_samples = view->texture->nr_samples;
 
-         /* If we have a non-alpha view into an svga3d surface with an
-          * alpha channel, then explicitly set the alpha channel to 1
-          * when sampling. Note that we need to check the
-          * actual device format to cover also imported surface cases.
-          */
-         const enum pipe_swizzle *swizzle_tab =
-            (view->texture->target != PIPE_BUFFER &&
-             !util_format_has_alpha(view->format) &&
-             svga_texture_device_format_has_alpha(view->texture)) ?
-            set_alpha : copy_alpha;
-
-         if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
-             view->texture->format == PIPE_FORMAT_DXT1_SRGB)
-            swizzle_tab = set_alpha;
+         const enum pipe_swizzle *swizzle_tab;
+         if (view->texture->target == PIPE_BUFFER) {
+            SVGA3dSurfaceFormat svga_format;
+            unsigned tf_flags;
+
+            /* Apply any special swizzle mask for the view format if needed */
+
+            svga_translate_texture_buffer_view_format(view->format,
+                                                      &svga_format, &tf_flags);
+            if (tf_flags & TF_000X)
+               swizzle_tab = set_000X;
+            else if (tf_flags & TF_XXXX)
+               swizzle_tab = set_XXXX;
+            else if (tf_flags & TF_XXX1)
+               swizzle_tab = set_XXX1;
+            else if (tf_flags & TF_XXXY)
+               swizzle_tab = set_XXXY;
+            else
+               swizzle_tab = copy_alpha;
+         }
+         else {
+            /* If we have a non-alpha view into an svga3d surface with an
+             * alpha channel, then explicitly set the alpha channel to 1
+             * when sampling. Note that we need to check the
+             * actual device format to cover also imported surface cases.
+             */
+            swizzle_tab =
+               (!util_format_has_alpha(view->format) &&
+                svga_texture_device_format_has_alpha(view->texture)) ?
+                set_alpha : copy_alpha;
+
+            if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
+                view->texture->format == PIPE_FORMAT_DXT1_SRGB)
+               swizzle_tab = set_alpha;
+
+            /* Save the compare function as we need to handle
+             * depth compare in the shader.
+             */
+            key->tex[i].compare_mode = sampler->compare_mode;
+            key->tex[i].compare_func = sampler->compare_func;
+         }
 
          key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
          key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
@@ -248,8 +323,10 @@ svga_init_shader_key_common(const struct svga_context *svga,
 
       if (sampler) {
          if (!sampler->normalized_coords) {
-            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
-            key->tex[i].width_height_idx = idx++;
+            if (view) {
+               assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
+               key->tex[i].width_height_idx = idx++;
+           }
             key->tex[i].unnormalized = TRUE;
             ++key->num_unnormalized_coords;
 
@@ -260,6 +337,9 @@ svga_init_shader_key_common(const struct svga_context *svga,
          }
       }
    }
+
+   key->clamp_vertex_color = svga->curr.rast ?
+                             svga->curr.rast->templ.clamp_vertex_color : 0;
 }
 
 
@@ -300,7 +380,6 @@ svga_search_shader_token_key(struct svga_shader *pshader,
  */
 static enum pipe_error
 define_gb_shader_vgpu9(struct svga_context *svga,
-                       SVGA3dShaderType type,
                        struct svga_shader_variant *variant,
                        unsigned codeLen)
 {
@@ -312,9 +391,11 @@ define_gb_shader_vgpu9(struct svga_context *svga,
     * Kernel module will allocate an id for the shader and issue
     * the DefineGBShader command.
     */
-   variant->gb_shader = sws->shader_create(sws, type,
+   variant->gb_shader = sws->shader_create(sws, variant->type,
                                            variant->tokens, codeLen);
 
+   svga->hud.shader_mem_used += codeLen;
+
    if (!variant->gb_shader)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
@@ -328,12 +409,12 @@ define_gb_shader_vgpu9(struct svga_context *svga,
  */
 static enum pipe_error
 define_gb_shader_vgpu10(struct svga_context *svga,
-                        SVGA3dShaderType type,
                         struct svga_shader_variant *variant,
                         unsigned codeLen)
 {
    struct svga_winsys_context *swc = svga->swc;
    enum pipe_error ret;
+   unsigned len = codeLen + variant->signatureLen;
 
    /**
     * Shaders in VGPU10 enabled device reside in the device COTable.
@@ -347,8 +428,12 @@ define_gb_shader_vgpu10(struct svga_context *svga,
 
    /* Create gb memory for the shader and upload the shader code */
    variant->gb_shader = swc->shader_create(swc,
-                                           variant->id, type,
-                                           variant->tokens, codeLen);
+                                           variant->id, variant->type,
+                                           variant->tokens, codeLen,
+                                           variant->signature,
+                                           variant->signatureLen);
+
+   svga->hud.shader_mem_used += len;
 
    if (!variant->gb_shader) {
       /* Free the shader ID */
@@ -365,7 +450,8 @@ define_gb_shader_vgpu10(struct svga_context *svga,
     * the shader creation and return an error.
     */
    ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
-                                           variant->id, type, codeLen);
+                                           variant->id, variant->type,
+                                           len);
 
    if (ret != PIPE_OK)
       goto fail;
@@ -390,7 +476,6 @@ fail_no_allocation:
  */
 enum pipe_error
 svga_define_shader(struct svga_context *svga,
-                   SVGA3dShaderType type,
                    struct svga_shader_variant *variant)
 {
    unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
@@ -402,9 +487,9 @@ svga_define_shader(struct svga_context *svga,
 
    if (svga_have_gb_objects(svga)) {
       if (svga_have_vgpu10(svga))
-         ret = define_gb_shader_vgpu10(svga, type, variant, codeLen);
+         ret = define_gb_shader_vgpu10(svga, variant, codeLen);
       else
-         ret = define_gb_shader_vgpu9(svga, type, variant, codeLen);
+         ret = define_gb_shader_vgpu9(svga, variant, codeLen);
    }
    else {
       /* Allocate an integer ID for the shader */
@@ -417,7 +502,7 @@ svga_define_shader(struct svga_context *svga,
       /* Issue SVGA3D device command to define the shader */
       ret = SVGA3D_DefineShader(svga->swc,
                                 variant->id,
-                                type,
+                                variant->type,
                                 variant->tokens,
                                 codeLen);
       if (ret != PIPE_OK) {
@@ -448,7 +533,10 @@ svga_set_shader(struct svga_context *svga,
 
    assert(type == SVGA3D_SHADERTYPE_VS ||
           type == SVGA3D_SHADERTYPE_GS ||
-          type == SVGA3D_SHADERTYPE_PS);
+          type == SVGA3D_SHADERTYPE_PS ||
+          type == SVGA3D_SHADERTYPE_HS ||
+          type == SVGA3D_SHADERTYPE_DS ||
+          type == SVGA3D_SHADERTYPE_CS);
 
    if (svga_have_gb_objects(svga)) {
       struct svga_winsys_gb_shader *gbshader =
@@ -468,30 +556,47 @@ svga_set_shader(struct svga_context *svga,
 
 
 struct svga_shader_variant *
-svga_new_shader_variant(struct svga_context *svga)
+svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
 {
-   svga->hud.num_shaders++;
-   return CALLOC_STRUCT(svga_shader_variant);
+   struct svga_shader_variant *variant;
+
+   switch (type) {
+   case PIPE_SHADER_FRAGMENT:
+      variant = CALLOC(1, sizeof(struct svga_fs_variant));
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      variant = CALLOC(1, sizeof(struct svga_gs_variant));
+      break;
+   case PIPE_SHADER_VERTEX:
+      variant = CALLOC(1, sizeof(struct svga_vs_variant));
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      variant = CALLOC(1, sizeof(struct svga_tes_variant));
+      break;
+   case PIPE_SHADER_TESS_CTRL:
+      variant = CALLOC(1, sizeof(struct svga_tcs_variant));
+      break;
+   default:
+      return NULL;
+   }
+
+   if (variant) {
+      variant->type = svga_shader_type(type);
+      svga->hud.num_shaders++;
+   }
+   return variant;
 }
 
 
-enum pipe_error
+void
 svga_destroy_shader_variant(struct svga_context *svga,
-                            SVGA3dShaderType type,
                             struct svga_shader_variant *variant)
 {
-   enum pipe_error ret = PIPE_OK;
-
    if (svga_have_gb_objects(svga) && variant->gb_shader) {
       if (svga_have_vgpu10(svga)) {
          struct svga_winsys_context *swc = svga->swc;
          swc->shader_destroy(swc, variant->gb_shader);
-         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
-         if (ret != PIPE_OK) {
-            /* flush and try again */
-            svga_context_flush(svga, NULL);
-            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
-         }
+         SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
          util_bitmask_clear(svga->shader_id_bm, variant->id);
       }
       else {
@@ -502,23 +607,17 @@ svga_destroy_shader_variant(struct svga_context *svga,
    }
    else {
       if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
-         ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-         if (ret != PIPE_OK) {
-            /* flush and try again */
-            svga_context_flush(svga, NULL);
-            ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-            assert(ret == PIPE_OK);
-         }
+         SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
+                                               variant->type));
          util_bitmask_clear(svga->shader_id_bm, variant->id);
       }
    }
 
+   FREE(variant->signature);
    FREE((unsigned *)variant->tokens);
    FREE(variant);
 
    svga->hud.num_shaders--;
-
-   return ret;
 }
 
 /*
@@ -546,6 +645,8 @@ svga_rebind_shaders(struct svga_context *svga)
       svga->rebind.flags.vs = 0;
       svga->rebind.flags.gs = 0;
       svga->rebind.flags.fs = 0;
+      svga->rebind.flags.tcs = 0;
+      svga->rebind.flags.tes = 0;
 
       return PIPE_OK;
    }
@@ -571,5 +672,19 @@ svga_rebind_shaders(struct svga_context *svga)
    }
    svga->rebind.flags.fs = 0;
 
+   if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.tcs = 0;
+
+   if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.tes = 0;
+
    return PIPE_OK;
 }