intel/fs: Drop the gl_program from fs_visitor
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_depth.c
index 06556dcba7a73ab8f8ca2c288904c276904ec04c..d5d5c5a786e95bc5cc0add9272e8f629356a0444 100644 (file)
@@ -359,11 +359,12 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
 
    z_swizzle = format_desc->swizzle[0];
 
-   if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+   if (z_swizzle == PIPE_SWIZZLE_NONE)
       return FALSE;
 
    *width = format_desc->channel[z_swizzle].size;
-   *shift = format_desc->channel[z_swizzle].shift;
+   /* & 31 is for the same reason as the 32-bit limit above */
+   *shift = format_desc->channel[z_swizzle].shift & 31;
 
    if (*width == total_bits) {
       *mask = 0xffffffff;
@@ -389,7 +390,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
 
    s_swizzle = format_desc->swizzle[1];
 
-   if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+   if (s_swizzle == PIPE_SWIZZLE_NONE)
       return FALSE;
 
    /* just special case 64bit d/s format */
@@ -836,7 +837,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    LLVMValueRef stencil_vals = NULL;
    LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
-   LLVMValueRef orig_mask = lp_build_mask_value(mask);
+   LLVMValueRef current_mask = lp_build_mask_value(mask);
    LLVMValueRef front_facing = NULL;
    boolean have_z, have_s;
 
@@ -872,8 +873,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       const unsigned z_swizzle = format_desc->swizzle[0];
       const unsigned s_swizzle = format_desc->swizzle[1];
 
-      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
-             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
+      assert(z_swizzle != PIPE_SWIZZLE_NONE ||
+             s_swizzle != PIPE_SWIZZLE_NONE);
 
       assert(depth->enabled || stencil[0].enabled);
 
@@ -962,21 +963,49 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    if (stencil[0].enabled) {
 
       if (face) {
-         LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
-
-         /* front_facing = face != 0 ? ~0 : 0 */
-         front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
-         front_facing = LLVMBuildSExt(builder, front_facing,
-                                      LLVMIntTypeInContext(gallivm->context,
-                                             s_bld.type.length*s_bld.type.width),
-                                      "");
-         front_facing = LLVMBuildBitCast(builder, front_facing,
-                                         s_bld.int_vec_type, "");
-      }
+         if (0) {
+            /*
+             * XXX: the scalar expansion below produces atrocious code
+             * (basically producing a 64bit scalar value, then moving the 2
+             * 32bit pieces separately to simd, plus 4 shuffles, which is
+             * seriously lame). But the scalar-simd transitions are always
+             * tricky, so no big surprise there.
+             * This here would be way better, however llvm has some serious
+             * trouble later using it in the select, probably because it will
+             * recognize the expression as constant and move the simd value
+             * away (out of the loop) - and then it will suddenly try
+             * constructing i1 high-bit masks out of it later...
+             * (Try piglit stencil-twoside.)
+             * Note this is NOT due to using SExt/Trunc, it fails exactly the
+             * same even when using native compare/select.
+             * I cannot reproduce this problem when using stand-alone compiler
+             * though, suggesting some problem with optimization passes...
+             * (With stand-alone compilation, the construction of this mask
+             * value, no matter if the easy 3 instruction here or the complex
+             * 16+ one below, never gets separated from where it's used.)
+             * The scalar code still has the same problem, but the generated
+             * code looks a bit better at least for some reason, even if
+             * mostly by luck (the fundamental issue clearly is the same).
+             */
+            front_facing = lp_build_broadcast(gallivm, s_bld.vec_type, face);
+            /* front_facing = face != 0 ? ~0 : 0 */
+            front_facing = lp_build_compare(gallivm, s_bld.type,
+                                            PIPE_FUNC_NOTEQUAL,
+                                            front_facing, s_bld.zero);
+         } else {
+            LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
 
-      /* convert scalar stencil refs into vectors */
-      stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
-      stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
+            /* front_facing = face != 0 ? ~0 : 0 */
+            front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
+            front_facing = LLVMBuildSExt(builder, front_facing,
+                                         LLVMIntTypeInContext(gallivm->context,
+                                                s_bld.type.length*s_bld.type.width),
+                                         "");
+            front_facing = LLVMBuildBitCast(builder, front_facing,
+                                            s_bld.int_vec_type, "");
+
+         }
+      }
 
       s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
                                           stencil_refs, stencil_vals,
@@ -984,7 +1013,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
       /* apply stencil-fail operator */
       {
-         LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
+         LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask);
          stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
                                             stencil_refs, stencil_vals,
                                             s_fail_mask, front_facing);
@@ -1032,6 +1061,11 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       /* compare src Z to dst Z, returning 'pass' mask */
       z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
 
+      /* mask off bits that failed stencil test */
+      if (s_pass_mask) {
+         current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
+      }
+
       if (!stencil[0].enabled) {
          /* We can potentially skip all remaining operations here, but only
           * if stencil is disabled because we still need to update the stencil
@@ -1041,25 +1075,19 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
          if (do_branch) {
             lp_build_mask_check(mask);
-            do_branch = FALSE;
          }
       }
 
       if (depth->writemask) {
-         LLVMValueRef zselectmask;
+         LLVMValueRef z_pass_mask;
 
          /* mask off bits that failed Z test */
-         zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
-
-         /* mask off bits that failed stencil test */
-         if (s_pass_mask) {
-            zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
-         }
+         z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
 
          /* Mix the old and new Z buffer values.
           * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
           */
-         z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
+         z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst);
       }
 
       if (stencil[0].enabled) {
@@ -1067,13 +1095,13 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
          LLVMValueRef z_fail_mask, z_pass_mask;
 
          /* apply Z-fail operator */
-         z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
+         z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass);
          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
                                             stencil_refs, stencil_vals,
                                             z_fail_mask, front_facing);
 
          /* apply Z-pass operator */
-         z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
+         z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
                                             stencil_refs, stencil_vals,
                                             z_pass_mask, front_facing);
@@ -1083,7 +1111,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       /* No depth test: apply Z-pass operator to stencil buffer values which
        * passed the stencil test.
        */
-      s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
+      s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
       stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
                                          stencil_refs, stencil_vals,
                                          s_pass_mask, front_facing);