radeonsi: always use Wave64 for HS/GS/VS shader stages (except GS fast launch)

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c

index 4befb3a1c80328503d33a4b3b7917e1aa731a160..8e453580b16e8a775d8579fda086d3ef2baefa41 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -34,7 +34,7 @@
  
  #include "pipe/p_defines.h"
  #include "pipe/p_state.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
  #include "util/u_math.h"
  #include "util/u_cpu_detect.h"
  #include "lp_bld_arit.h"
@@ -114,9 +114,9 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state,
     state->swizzle_a         = view->swizzle_a;
  
     state->target            = view->target;
-   state->pot_width         = util_is_power_of_two(texture->width0);
-   state->pot_height        = util_is_power_of_two(texture->height0);
-   state->pot_depth         = util_is_power_of_two(texture->depth0);
+   state->pot_width         = util_is_power_of_two_or_zero(texture->width0);
+   state->pot_height        = util_is_power_of_two_or_zero(texture->height0);
+   state->pot_depth         = util_is_power_of_two_or_zero(texture->depth0);
     state->level_zero_only   = !view->u.tex.last_level;
  
     /*
@@ -125,6 +125,41 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state,
      */
  }
  
+/**
+ * Initialize lp_sampler_static_texture_state object with the gallium
+ * texture/sampler_view state (this contains the parts which are
+ * considered static).
+ */
+void
+lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
+                                      const struct pipe_image_view *view)
+{
+   const struct pipe_resource *resource;
+
+   memset(state, 0, sizeof *state);
+
+   if (!view || !view->resource)
+      return;
+
+   resource = view->resource;
+
+   state->format            = view->format;
+   state->swizzle_r         = PIPE_SWIZZLE_X;
+   state->swizzle_g         = PIPE_SWIZZLE_Y;
+   state->swizzle_b         = PIPE_SWIZZLE_Z;
+   state->swizzle_a         = PIPE_SWIZZLE_W;
+
+   state->target            = view->resource->target;
+   state->pot_width         = util_is_power_of_two_or_zero(resource->width0);
+   state->pot_height        = util_is_power_of_two_or_zero(resource->height0);
+   state->pot_depth         = util_is_power_of_two_or_zero(resource->depth0);
+   state->level_zero_only   = 0;
+
+   /*
+    * the layer / element / level parameters are all either dynamic
+    * state or handled transparently wrt execution.
+    */
+}
  
  /**
   * Initialize lp_sampler_static_sampler_state object with the gallium sampler
@@ -144,7 +179,7 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
      * spurious recompiles, as the sampler static state is part of the shader
      * key.
      *
-    * Ideally the state tracker or cso_cache module would make all state
+    * Ideally gallium frontends or cso_cache module would make all state
      * canonical, but until that happens it's better to be safe than sorry here.
      *
      * XXX: Actually there's much more than can be done here, especially
@@ -156,19 +191,19 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
     state->wrap_r            = sampler->wrap_r;
     state->min_img_filter    = sampler->min_img_filter;
     state->mag_img_filter    = sampler->mag_img_filter;
+   state->min_mip_filter    = sampler->min_mip_filter;
     state->seamless_cube_map = sampler->seamless_cube_map;
  
     if (sampler->max_lod > 0.0f) {
-      state->min_mip_filter = sampler->min_mip_filter;
-   } else {
-      state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+      state->max_lod_pos = 1;
+   }
+
+   if (sampler->lod_bias != 0.0f) {
+      state->lod_bias_non_zero = 1;
     }
  
     if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
         state->min_img_filter != state->mag_img_filter) {
-      if (sampler->lod_bias != 0.0f) {
-         state->lod_bias_non_zero = 1;
-      }
  
        /* If min_lod == max_lod we can greatly simplify mipmap selection.
         * This is a case that occurs during automatic mipmap generation.
@@ -234,7 +269,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
     unsigned length = coord_bld->type.length;
     unsigned num_quads = length / 4;
     boolean rho_per_quad = rho_bld->type.length != length;
-   boolean no_rho_opt = (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) && (dims > 1);
+   boolean no_rho_opt = bld->no_rho_approx && (dims > 1);
     unsigned i;
     LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
     LLVMValueRef rho_xvec, rho_yvec;
@@ -275,7 +310,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
        rho = lp_build_mul(rho_bld, cubesize, rho);
     }
     else if (derivs) {
-      LLVMValueRef ddmax[3], ddx[3], ddy[3];
+      LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
        for (i = 0; i < dims; i++) {
           LLVMValueRef floatdim;
           LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
@@ -580,10 +615,8 @@ lp_build_brilinear_lod(struct lp_build_context *bld,
  
     lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
  
-   lod_fpart = lp_build_mul(bld, lod_fpart,
-                            lp_build_const_vec(bld->gallivm, bld->type, factor));
-
-   lod_fpart = lp_build_add(bld, lod_fpart,
+   lod_fpart = lp_build_mad(bld, lod_fpart,
+                            lp_build_const_vec(bld->gallivm, bld->type, factor),
                              lp_build_const_vec(bld->gallivm, bld->type, post_offset));
  
     /*
@@ -639,10 +672,8 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
     /* fpart = rho / 2**ipart */
     lod_fpart = lp_build_extract_mantissa(bld, rho);
  
-   lod_fpart = lp_build_mul(bld, lod_fpart,
-                            lp_build_const_vec(bld->gallivm, bld->type, factor));
-
-   lod_fpart = lp_build_add(bld, lod_fpart,
+   lod_fpart = lp_build_mad(bld, lod_fpart,
+                            lp_build_const_vec(bld->gallivm, bld->type, factor),
                              lp_build_const_vec(bld->gallivm, bld->type, post_offset));
  
     /*
@@ -698,6 +729,7 @@ lp_build_ilog2_sqrt(struct lp_build_context *bld,
   */
  void
  lp_build_lod_selector(struct lp_build_sample_context *bld,
+                      boolean is_lodq,
                        unsigned texture_unit,
                        unsigned sampler_unit,
                        LLVMValueRef s,
@@ -708,6 +740,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                        LLVMValueRef lod_bias, /* optional */
                        LLVMValueRef explicit_lod, /* optional */
                        unsigned mip_filter,
+                      LLVMValueRef *out_lod,
                        LLVMValueRef *out_lod_ipart,
                        LLVMValueRef *out_lod_fpart,
                        LLVMValueRef *out_lod_positive)
@@ -740,7 +773,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
      * I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
      */
  
-   if (bld->static_sampler_state->min_max_lod_equal) {
+   if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
        /* User is forcing sampling from a particular mipmap level.
         * This is hit during mipmap generation.
         */
@@ -760,7 +793,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
        }
        else {
           LLVMValueRef rho;
-         boolean rho_squared = ((gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
+         boolean rho_squared = (bld->no_rho_approx &&
                                  (bld->dims > 1)) || cube_rho;
  
           rho = lp_build_rho(bld, texture_unit, s, t, r, cube_rho, derivs);
@@ -769,7 +802,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
            * Compute lod = log2(rho)
            */
  
-         if (!lod_bias &&
+         if (!lod_bias && !is_lodq &&
               !bld->static_sampler_state->lod_bias_non_zero &&
               !bld->static_sampler_state->apply_max_lod &&
               !bld->static_sampler_state->apply_min_lod) {
@@ -796,8 +829,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                 return;
              }
              if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
-                !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR) &&
-                !rho_squared) {
+                !bld->no_brilinear && !rho_squared) {
                 /*
                  * This can't work if rho is squared. Not sure if it could be
                  * fixed while keeping it worthwile, could also do sqrt here
@@ -843,6 +875,10 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
           lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
        }
  
+      if (is_lodq) {
+         *out_lod = lod;
+      }
+
        /* clamp lod */
        if (bld->static_sampler_state->apply_max_lod) {
           LLVMValueRef max_lod =
@@ -860,13 +896,18 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
  
           lod = lp_build_max(lodf_bld, lod, min_lod);
        }
+
+      if (is_lodq) {
+         *out_lod_fpart = lod;
+         return;
+      }
     }
  
     *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
                                      lod, lodf_bld->zero);
  
     if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-      if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+      if (!bld->no_brilinear) {
           lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
                                  out_lod_ipart, out_lod_fpart);
        }
@@ -1416,8 +1457,8 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
  {
     const unsigned dims = bld->dims;
     LLVMValueRef width;
-   LLVMValueRef height;
-   LLVMValueRef depth;
+   LLVMValueRef height = NULL;
+   LLVMValueRef depth = NULL;
  
     lp_build_extract_image_sizes(bld,
                                  &bld->float_size_bld,
@@ -1683,9 +1724,7 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
     maxasat = lp_build_max(coord_bld, as, at);
     ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
  
-   if (need_derivs && (derivs_in ||
-       ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
-        (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
+   if (need_derivs && (derivs_in || (bld->no_quad_lod && bld->no_rho_approx))) {
        /*
         * XXX: This is really really complex.
         * It is a bit overkill to use this for implicit derivatives as well,