z_swizzle = format_desc->swizzle[0];
- if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+ if (z_swizzle == PIPE_SWIZZLE_NONE)
return FALSE;
*width = format_desc->channel[z_swizzle].size;
- *shift = format_desc->channel[z_swizzle].shift;
+ /* & 31 is for the same reason as the 32-bit limit above */
+ *shift = format_desc->channel[z_swizzle].shift & 31;
if (*width == total_bits) {
*mask = 0xffffffff;
s_swizzle = format_desc->swizzle[1];
- if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+ if (s_swizzle == PIPE_SWIZZLE_NONE)
return FALSE;
/* just special case 64bit d/s format */
LLVMValueRef stencil_vals = NULL;
LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
- LLVMValueRef orig_mask = lp_build_mask_value(mask);
+ LLVMValueRef current_mask = lp_build_mask_value(mask);
LLVMValueRef front_facing = NULL;
boolean have_z, have_s;
const unsigned z_swizzle = format_desc->swizzle[0];
const unsigned s_swizzle = format_desc->swizzle[1];
- assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
- s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
+ assert(z_swizzle != PIPE_SWIZZLE_NONE ||
+ s_swizzle != PIPE_SWIZZLE_NONE);
assert(depth->enabled || stencil[0].enabled);
if (stencil[0].enabled) {
if (face) {
- LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
-
- /* front_facing = face != 0 ? ~0 : 0 */
- front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
- front_facing = LLVMBuildSExt(builder, front_facing,
- LLVMIntTypeInContext(gallivm->context,
- s_bld.type.length*s_bld.type.width),
- "");
- front_facing = LLVMBuildBitCast(builder, front_facing,
- s_bld.int_vec_type, "");
- }
+ if (0) {
+ /*
+ * XXX: the scalar expansion below produces atrocious code
+ * (basically producing a 64bit scalar value, then moving the 2
+ * 32bit pieces separately to simd, plus 4 shuffles, which is
+ * seriously lame). But the scalar-simd transitions are always
+ * tricky, so no big surprise there.
+ * This here would be way better, however llvm has some serious
+ * trouble later using it in the select, probably because it will
+ * recognize the expression as constant and move the simd value
+ * away (out of the loop) - and then it will suddenly try
+ * constructing i1 high-bit masks out of it later...
+ * (Try piglit stencil-twoside.)
+ * Note this is NOT due to using SExt/Trunc, it fails exactly the
+ * same even when using native compare/select.
+ * I cannot reproduce this problem when using stand-alone compiler
+ * though, suggesting some problem with optimization passes...
+ * (With stand-alone compilation, the construction of this mask
+ * value, no matter if the easy 3 instruction here or the complex
+ * 16+ one below, never gets separated from where it's used.)
+ * The scalar code still has the same problem, but the generated
+ * code looks a bit better at least for some reason, even if
+ * mostly by luck (the fundamental issue clearly is the same).
+ */
+ front_facing = lp_build_broadcast(gallivm, s_bld.vec_type, face);
+ /* front_facing = face != 0 ? ~0 : 0 */
+ front_facing = lp_build_compare(gallivm, s_bld.type,
+ PIPE_FUNC_NOTEQUAL,
+ front_facing, s_bld.zero);
+ } else {
+ LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
- /* convert scalar stencil refs into vectors */
- stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
- stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
+ /* front_facing = face != 0 ? ~0 : 0 */
+ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
+ front_facing = LLVMBuildSExt(builder, front_facing,
+ LLVMIntTypeInContext(gallivm->context,
+ s_bld.type.length*s_bld.type.width),
+ "");
+ front_facing = LLVMBuildBitCast(builder, front_facing,
+ s_bld.int_vec_type, "");
+
+ }
+ }
s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
stencil_refs, stencil_vals,
/* apply stencil-fail operator */
{
- LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
+ LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask);
stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
stencil_refs, stencil_vals,
s_fail_mask, front_facing);
/* compare src Z to dst Z, returning 'pass' mask */
z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
+ /* mask off bits that failed stencil test */
+ if (s_pass_mask) {
+ current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
+ }
+
if (!stencil[0].enabled) {
/* We can potentially skip all remaining operations here, but only
* if stencil is disabled because we still need to update the stencil
if (do_branch) {
lp_build_mask_check(mask);
- do_branch = FALSE;
}
}
if (depth->writemask) {
- LLVMValueRef zselectmask;
+ LLVMValueRef z_pass_mask;
/* mask off bits that failed Z test */
- zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
-
- /* mask off bits that failed stencil test */
- if (s_pass_mask) {
- zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
- }
+ z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
/* Mix the old and new Z buffer values.
* z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
*/
- z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
+ z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst);
}
if (stencil[0].enabled) {
LLVMValueRef z_fail_mask, z_pass_mask;
/* apply Z-fail operator */
- z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
+ z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass);
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
stencil_refs, stencil_vals,
z_fail_mask, front_facing);
/* apply Z-pass operator */
- z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
+ z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
z_pass_mask, front_facing);
/* No depth test: apply Z-pass operator to stencil buffer values which
* passed the stencil test.
*/
- s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
+ s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
s_pass_mask, front_facing);