gallium: add GREMEDY_string_marker
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_depth.c
index 5ef99473e4dc297ead98dd5e01d5f233cf29c97c..b25e041375070a4bdc2209b2c2a722a723775bb1 100644 (file)
@@ -349,8 +349,6 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
 {
    unsigned total_bits;
    unsigned z_swizzle;
-   unsigned chan;
-   unsigned padding_left, padding_right;
 
    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
    assert(format_desc->block.width == 1);
@@ -365,25 +363,15 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
       return FALSE;
 
    *width = format_desc->channel[z_swizzle].size;
+   /* & 31 is for the same reason as the 32-bit limit above */
+   *shift = format_desc->channel[z_swizzle].shift & 31;
 
-   padding_right = 0;
-   for (chan = 0; chan < z_swizzle; ++chan)
-      padding_right += format_desc->channel[chan].size;
-
-   padding_left =
-      total_bits - (padding_right + *width);
-
-   if (padding_left || padding_right) {
-      unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
-      unsigned long long mask_right = (1ULL << (padding_right)) - 1;
-      *mask = mask_left ^ mask_right;
-   }
-   else {
+   if (*width == total_bits) {
       *mask = 0xffffffff;
+   } else {
+      *mask = ((1 << *width) - 1) << *shift;
    }
 
-   *shift = padding_right;
-
    return TRUE;
 }
 
@@ -398,7 +386,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
                      unsigned *shift, unsigned *mask)
 {
    unsigned s_swizzle;
-   unsigned chan, sz;
+   unsigned sz;
 
    s_swizzle = format_desc->swizzle[1];
 
@@ -407,16 +395,14 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
 
    /* just special case 64bit d/s format */
    if (format_desc->block.bits > 32) {
+      /* XXX big-endian? */
       assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
       *shift = 0;
       *mask = 0xff;
       return TRUE;
    }
 
-   *shift = 0;
-   for (chan = 0; chan < s_swizzle; chan++)
-      *shift += format_desc->channel[chan].size;
-
+   *shift = format_desc->channel[s_swizzle].shift;
    sz = format_desc->channel[s_swizzle].size;
    *mask = (1U << sz) - 1U;
 
@@ -429,6 +415,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
  * Test the depth mask. Add the number of channel which has none zero mask
  * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
  * The counter will add 4.
+ * TODO: could get that out of the fs loop.
  *
  * \param type holds element type of the mask vector.
  * \param maskvalue is the depth test mask.
@@ -457,6 +444,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
                                       LLVMInt32TypeInContext(context), bits);
       count = lp_build_intrinsic_unary(builder, popcntintr,
                                        LLVMInt32TypeInContext(context), bits);
+      count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
    }
    else if(util_cpu_caps.has_avx && type.length == 8) {
       const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
@@ -467,6 +455,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
                                       LLVMInt32TypeInContext(context), bits);
       count = lp_build_intrinsic_unary(builder, popcntintr,
                                        LLVMInt32TypeInContext(context), bits);
+      count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
    }
    else {
       unsigned i;
@@ -509,8 +498,11 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
        }
        count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
 
-       if (type.length > 4) {
-          count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
+       if (type.length > 8) {
+          count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), "");
+       }
+       else if (type.length < 8) {
+          count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
        }
    }
    newcount = LLVMBuildLoad(builder, counter, "origcount");
@@ -525,6 +517,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
  *
  * \param type  the data type of the fragment depth/stencil values
  * \param format_desc  description of the depth/stencil surface
+ * \param is_1d  whether this resource has only one dimension
  * \param loop_counter  the current loop iteration
  * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
  * \param depth_stride  stride of the depth/stencil buffer
@@ -535,6 +528,7 @@ void
 lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
+                                     boolean is_1d,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
                                      LLVMValueRef *z_fb,
@@ -592,9 +586,14 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
    zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
    zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
    zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
-   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
-   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
-   zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+   if (is_1d) {
+      zs_dst2 = lp_build_undef(gallivm, zs_load_type);
+   }
+   else {
+      zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+      zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
+      zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+   }
 
    *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
                                   LLVMConstVector(shuffles, zs_type.length), "");
@@ -648,6 +647,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
  *
  * \param type  the data type of the fragment depth/stencil values
  * \param format_desc  description of the depth/stencil surface
+ * \param is_1d  whether this resource has only one dimension
  * \param mask  the alive/dead pixel mask for the quad (vector)
  * \param z_fb  z values read from fb (with padding)
  * \param s_fb  s values read from fb (with padding)
@@ -661,6 +661,7 @@ void
 lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
+                                      boolean is_1d,
                                       struct lp_build_mask_context *mask,
                                       LLVMValueRef z_fb,
                                       LLVMValueRef s_fb,
@@ -791,7 +792,9 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
    }
 
    LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
-   LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+   if (!is_1d) {
+      LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+   }
 }
 
 /**
@@ -834,7 +837,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    LLVMValueRef stencil_vals = NULL;
    LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
-   LLVMValueRef orig_mask = lp_build_mask_value(mask);
+   LLVMValueRef current_mask = lp_build_mask_value(mask);
    LLVMValueRef front_facing = NULL;
    boolean have_z, have_s;
 
@@ -972,17 +975,13 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                                          s_bld.int_vec_type, "");
       }
 
-      /* convert scalar stencil refs into vectors */
-      stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
-      stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
-
       s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
                                           stencil_refs, stencil_vals,
                                           front_facing);
 
       /* apply stencil-fail operator */
       {
-         LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
+         LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask);
          stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
                                             stencil_refs, stencil_vals,
                                             s_fail_mask, front_facing);
@@ -1030,6 +1029,11 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       /* compare src Z to dst Z, returning 'pass' mask */
       z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
 
+      /* mask off bits that failed stencil test */
+      if (s_pass_mask) {
+         current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
+      }
+
       if (!stencil[0].enabled) {
          /* We can potentially skip all remaining operations here, but only
           * if stencil is disabled because we still need to update the stencil
@@ -1039,25 +1043,19 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
          if (do_branch) {
             lp_build_mask_check(mask);
-            do_branch = FALSE;
          }
       }
 
       if (depth->writemask) {
-         LLVMValueRef zselectmask;
+         LLVMValueRef z_pass_mask;
 
          /* mask off bits that failed Z test */
-         zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
-
-         /* mask off bits that failed stencil test */
-         if (s_pass_mask) {
-            zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
-         }
+         z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
 
          /* Mix the old and new Z buffer values.
           * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
           */
-         z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
+         z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst);
       }
 
       if (stencil[0].enabled) {
@@ -1065,13 +1063,13 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
          LLVMValueRef z_fail_mask, z_pass_mask;
 
          /* apply Z-fail operator */
-         z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
+         z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass);
          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
                                             stencil_refs, stencil_vals,
                                             z_fail_mask, front_facing);
 
          /* apply Z-pass operator */
-         z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
+         z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
                                             stencil_refs, stencil_vals,
                                             z_pass_mask, front_facing);
@@ -1081,7 +1079,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       /* No depth test: apply Z-pass operator to stencil buffer values which
        * passed the stencil test.
        */
-      s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
+      s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
       stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
                                          stencil_refs, stencil_vals,
                                          s_pass_mask, front_facing);
@@ -1116,9 +1114,5 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
    if (depth->enabled && stencil[0].enabled)
       lp_build_mask_update(mask, z_pass);
-
-   if (do_branch)
-      lp_build_mask_check(mask);
-
 }