svga: Performance fixes
[mesa.git] / src / gallium / drivers / svga / svga_shader.c
index 5c99e16d97659fc5f2ad44623c55b1e3f88b94e3..52f1153fd61082590cda36ca2e8965adcbd2914b 100644 (file)
 
 #include "util/u_bitmask.h"
 #include "util/u_memory.h"
+#include "util/format/u_format.h"
 #include "svga_context.h"
 #include "svga_cmd.h"
 #include "svga_format.h"
 #include "svga_shader.h"
+#include "svga_resource_texture.h"
 
 
 /**
@@ -160,58 +162,184 @@ svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
    return remap_table[generic_index];
 }
 
+static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_Z,
+   PIPE_SWIZZLE_W,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_Z,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
+static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_X,
+   PIPE_SWIZZLE_Y,
+   PIPE_SWIZZLE_0,
+   PIPE_SWIZZLE_1,
+   PIPE_SWIZZLE_NONE
+};
+
 
 /**
  * Initialize the shader-neutral fields of svga_compile_key from context
  * state.  This is basically the texture-related state.
  */
 void
-svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
+svga_init_shader_key_common(const struct svga_context *svga,
+                            enum pipe_shader_type shader_type,
+                            const struct svga_shader *shader,
                             struct svga_compile_key *key)
 {
    unsigned i, idx = 0;
 
-   assert(shader < Elements(svga->curr.num_sampler_views));
+   assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
+
+   /* In case the number of samplers and sampler_views doesn't match,
+    * loop over the lower of the two counts.
+    */
+   key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
+                            svga->curr.num_samplers[shader_type]);
+
+   for (i = 0; i < key->num_textures; i++) {
+      struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
+      const struct svga_sampler_state
+         *sampler = svga->curr.sampler[shader_type][i];
 
-   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
-      struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
       if (view) {
-         assert(svga->curr.sampler[shader][i]);
          assert(view->texture);
          assert(view->texture->target < (1 << 4)); /* texture_target:4 */
 
-         key->tex[i].texture_target = view->texture->target;
-
-         /* 1D/2D array textures with one slice are treated as non-arrays
-          * by the SVGA3D device.  Convert the texture type here so that
-          * we emit the right TEX/SAMPLE instruction in the shader.
+         /* 1D/2D array textures with one slice and cube map array textures
+          * with one cube are treated as non-arrays by the SVGA3D device.
+          * Set the is_array flag only if we know that we have more than 1
+          * element.  This will be used to select shader instruction/resource
+          * types during shader translation.
           */
-         if (view->texture->array_size == 1) {
-            if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) {
-               key->tex[i].texture_target = PIPE_TEXTURE_1D;
-            }
-            else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
-               key->tex[i].texture_target = PIPE_TEXTURE_2D;
-            }
+         switch (view->texture->target) {
+         case PIPE_TEXTURE_1D_ARRAY:
+         case PIPE_TEXTURE_2D_ARRAY:
+            key->tex[i].is_array = view->texture->array_size > 1;
+            break;
+         case PIPE_TEXTURE_CUBE_ARRAY:
+            key->tex[i].is_array = view->texture->array_size > 6;
+            break;
+         default:
+            ; /* nothing / silence compiler warning */
          }
 
-         key->tex[i].texture_msaa = view->texture->nr_samples > 1;
-         if (!svga->curr.sampler[shader][i]->normalized_coords) {
-            assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
-            key->tex[i].width_height_idx = idx++;
-            key->tex[i].unnormalized = TRUE;
-            ++key->num_unnormalized_coords;
+         assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
+         key->tex[i].num_samples = view->texture->nr_samples;
+
+         const enum pipe_swizzle *swizzle_tab;
+         if (view->texture->target == PIPE_BUFFER) {
+            SVGA3dSurfaceFormat svga_format;
+            unsigned tf_flags;
+
+            /* Apply any special swizzle mask for the view format if needed */
+
+            svga_translate_texture_buffer_view_format(view->format,
+                                                      &svga_format, &tf_flags);
+            if (tf_flags & TF_000X)
+               swizzle_tab = set_000X;
+            else if (tf_flags & TF_XXXX)
+               swizzle_tab = set_XXXX;
+            else if (tf_flags & TF_XXX1)
+               swizzle_tab = set_XXX1;
+            else if (tf_flags & TF_XXXY)
+               swizzle_tab = set_XXXY;
+            else
+               swizzle_tab = copy_alpha;
+         }
+         else {
+            /* If we have a non-alpha view into an svga3d surface with an
+             * alpha channel, then explicitly set the alpha channel to 1
+             * when sampling. Note that we need to check the
+             * actual device format to cover also imported surface cases.
+             */
+            swizzle_tab =
+               (!util_format_has_alpha(view->format) &&
+                svga_texture_device_format_has_alpha(view->texture)) ?
+                set_alpha : copy_alpha;
+
+            if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
+                view->texture->format == PIPE_FORMAT_DXT1_SRGB)
+               swizzle_tab = set_alpha;
+
+            /* Save the compare function as we need to handle
+             * depth compare in the shader.
+             */
+            key->tex[i].compare_mode = sampler->compare_mode;
+            key->tex[i].compare_func = sampler->compare_func;
          }
 
-         key->tex[i].swizzle_r = view->swizzle_r;
-         key->tex[i].swizzle_g = view->swizzle_g;
-         key->tex[i].swizzle_b = view->swizzle_b;
-         key->tex[i].swizzle_a = view->swizzle_a;
+         key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
+         key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
+         key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
+         key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
+      }
 
-         key->tex[i].return_type = svga_get_texture_datatype(view->format);
+      if (sampler) {
+         if (!sampler->normalized_coords) {
+            if (view) {
+               assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
+               key->tex[i].width_height_idx = idx++;
+           }
+            key->tex[i].unnormalized = TRUE;
+            ++key->num_unnormalized_coords;
+
+            if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
+                sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
+                key->tex[i].texel_bias = TRUE;
+            }
+         }
       }
    }
-   key->num_textures = svga->curr.num_sampler_views[shader];
+
+   key->clamp_vertex_color = svga->curr.rast ?
+                             svga->curr.rast->templ.clamp_vertex_color : 0;
 }
 
 
@@ -252,7 +380,6 @@ svga_search_shader_token_key(struct svga_shader *pshader,
  */
 static enum pipe_error
 define_gb_shader_vgpu9(struct svga_context *svga,
-                       SVGA3dShaderType type,
                        struct svga_shader_variant *variant,
                        unsigned codeLen)
 {
@@ -264,9 +391,11 @@ define_gb_shader_vgpu9(struct svga_context *svga,
     * Kernel module will allocate an id for the shader and issue
     * the DefineGBShader command.
     */
-   variant->gb_shader = sws->shader_create(sws, type,
+   variant->gb_shader = sws->shader_create(sws, variant->type,
                                            variant->tokens, codeLen);
 
+   svga->hud.shader_mem_used += codeLen;
+
    if (!variant->gb_shader)
       return PIPE_ERROR_OUT_OF_MEMORY;
 
@@ -280,12 +409,12 @@ define_gb_shader_vgpu9(struct svga_context *svga,
  */
 static enum pipe_error
 define_gb_shader_vgpu10(struct svga_context *svga,
-                        SVGA3dShaderType type,
                         struct svga_shader_variant *variant,
                         unsigned codeLen)
 {
    struct svga_winsys_context *swc = svga->swc;
    enum pipe_error ret;
+   unsigned len = codeLen + variant->signatureLen;
 
    /**
     * Shaders in VGPU10 enabled device reside in the device COTable.
@@ -299,8 +428,12 @@ define_gb_shader_vgpu10(struct svga_context *svga,
 
    /* Create gb memory for the shader and upload the shader code */
    variant->gb_shader = swc->shader_create(swc,
-                                           variant->id, type,
-                                           variant->tokens, codeLen);
+                                           variant->id, variant->type,
+                                           variant->tokens, codeLen,
+                                           variant->signature,
+                                           variant->signatureLen);
+
+   svga->hud.shader_mem_used += len;
 
    if (!variant->gb_shader) {
       /* Free the shader ID */
@@ -317,7 +450,8 @@ define_gb_shader_vgpu10(struct svga_context *svga,
     * the shader creation and return an error.
     */
    ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
-                                           variant->id, type, codeLen);
+                                           variant->id, variant->type,
+                                           len);
 
    if (ret != PIPE_OK)
       goto fail;
@@ -342,31 +476,33 @@ fail_no_allocation:
  */
 enum pipe_error
 svga_define_shader(struct svga_context *svga,
-                   SVGA3dShaderType type,
                    struct svga_shader_variant *variant)
 {
    unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
    enum pipe_error ret;
 
+   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
+
    variant->id = UTIL_BITMASK_INVALID_INDEX;
 
    if (svga_have_gb_objects(svga)) {
       if (svga_have_vgpu10(svga))
-         return define_gb_shader_vgpu10(svga, type, variant, codeLen);
+         ret = define_gb_shader_vgpu10(svga, variant, codeLen);
       else
-         return define_gb_shader_vgpu9(svga, type, variant, codeLen);
+         ret = define_gb_shader_vgpu9(svga, variant, codeLen);
    }
    else {
       /* Allocate an integer ID for the shader */
       variant->id = util_bitmask_add(svga->shader_id_bm);
       if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
-         return PIPE_ERROR_OUT_OF_MEMORY;
+         ret = PIPE_ERROR_OUT_OF_MEMORY;
+         goto done;
       }
 
       /* Issue SVGA3D device command to define the shader */
       ret = SVGA3D_DefineShader(svga->swc,
                                 variant->id,
-                                type,
+                                variant->type,
                                 variant->tokens,
                                 codeLen);
       if (ret != PIPE_OK) {
@@ -377,6 +513,8 @@ svga_define_shader(struct svga_context *svga,
       }
    }
 
+done:
+   SVGA_STATS_TIME_POP(svga_sws(svga));
    return ret;
 }
 
@@ -395,7 +533,10 @@ svga_set_shader(struct svga_context *svga,
 
    assert(type == SVGA3D_SHADERTYPE_VS ||
           type == SVGA3D_SHADERTYPE_GS ||
-          type == SVGA3D_SHADERTYPE_PS);
+          type == SVGA3D_SHADERTYPE_PS ||
+          type == SVGA3D_SHADERTYPE_HS ||
+          type == SVGA3D_SHADERTYPE_DS ||
+          type == SVGA3D_SHADERTYPE_CS);
 
    if (svga_have_gb_objects(svga)) {
       struct svga_winsys_gb_shader *gbshader =
@@ -415,30 +556,47 @@ svga_set_shader(struct svga_context *svga,
 
 
 struct svga_shader_variant *
-svga_new_shader_variant(struct svga_context *svga)
+svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
 {
-   svga->hud.num_shaders++;
-   return CALLOC_STRUCT(svga_shader_variant);
+   struct svga_shader_variant *variant;
+
+   switch (type) {
+   case PIPE_SHADER_FRAGMENT:
+      variant = CALLOC(1, sizeof(struct svga_fs_variant));
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      variant = CALLOC(1, sizeof(struct svga_gs_variant));
+      break;
+   case PIPE_SHADER_VERTEX:
+      variant = CALLOC(1, sizeof(struct svga_vs_variant));
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      variant = CALLOC(1, sizeof(struct svga_tes_variant));
+      break;
+   case PIPE_SHADER_TESS_CTRL:
+      variant = CALLOC(1, sizeof(struct svga_tcs_variant));
+      break;
+   default:
+      return NULL;
+   }
+
+   if (variant) {
+      variant->type = svga_shader_type(type);
+      svga->hud.num_shaders++;
+   }
+   return variant;
 }
 
 
-enum pipe_error
+void
 svga_destroy_shader_variant(struct svga_context *svga,
-                            SVGA3dShaderType type,
                             struct svga_shader_variant *variant)
 {
-   enum pipe_error ret = PIPE_OK;
-
    if (svga_have_gb_objects(svga) && variant->gb_shader) {
       if (svga_have_vgpu10(svga)) {
          struct svga_winsys_context *swc = svga->swc;
          swc->shader_destroy(swc, variant->gb_shader);
-         ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
-         if (ret != PIPE_OK) {
-            /* flush and try again */
-            svga_context_flush(svga, NULL);
-            ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
-         }
+         SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
          util_bitmask_clear(svga->shader_id_bm, variant->id);
       }
       else {
@@ -449,23 +607,17 @@ svga_destroy_shader_variant(struct svga_context *svga,
    }
    else {
       if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
-         ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-         if (ret != PIPE_OK) {
-            /* flush and try again */
-            svga_context_flush(svga, NULL);
-            ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
-            assert(ret == PIPE_OK);
-         }
+         SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
+                                               variant->type));
          util_bitmask_clear(svga->shader_id_bm, variant->id);
       }
    }
 
+   FREE(variant->signature);
    FREE((unsigned *)variant->tokens);
    FREE(variant);
 
    svga->hud.num_shaders--;
-
-   return ret;
 }
 
 /*
@@ -493,6 +645,8 @@ svga_rebind_shaders(struct svga_context *svga)
       svga->rebind.flags.vs = 0;
       svga->rebind.flags.gs = 0;
       svga->rebind.flags.fs = 0;
+      svga->rebind.flags.tcs = 0;
+      svga->rebind.flags.tes = 0;
 
       return PIPE_OK;
    }
@@ -518,5 +672,19 @@ svga_rebind_shaders(struct svga_context *svga)
    }
    svga->rebind.flags.fs = 0;
 
+   if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.tcs = 0;
+
+   if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
+      ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   svga->rebind.flags.tes = 0;
+
    return PIPE_OK;
 }