+ else {
+ assert(!z_src_type.sign);
+ assert(z_src_type.norm);
+ }
+
+ /* Pick the type matching the depth-stencil format. */
+ z_type = lp_depth_type(format_desc, z_src_type.length);
+
+ /* Pick the intermediate type for depth operations. */
+ z_type.width = z_src_type.width;
+ assert(z_type.length == z_src_type.length);
+
+ /* FIXME: for non-float depth/stencil might generate better code
+ * if we'd always split it up to use 128bit operations.
+ * For stencil we'd almost certainly want to pack to 8xi16 values,
+ * for z just run twice.
+ */
+
+ /* Sanity checking */
+ {
+ const unsigned z_swizzle = format_desc->swizzle[0];
+ const unsigned s_swizzle = format_desc->swizzle[1];
+
+ assert(z_swizzle != PIPE_SWIZZLE_NONE ||
+ s_swizzle != PIPE_SWIZZLE_NONE);
+
+ assert(depth->enabled || stencil[0].enabled);
+
+ assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+
+ if (stencil[0].enabled) {
+ assert(s_swizzle < 4);
+ assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
+ assert(format_desc->channel[s_swizzle].pure_integer);
+ assert(!format_desc->channel[s_swizzle].normalized);
+ assert(format_desc->channel[s_swizzle].size == 8);
+ }
+
+ if (depth->enabled) {
+ assert(z_swizzle < 4);
+ if (z_type.floating) {
+ assert(z_swizzle == 0);
+ assert(format_desc->channel[z_swizzle].type ==
+ UTIL_FORMAT_TYPE_FLOAT);
+ assert(format_desc->channel[z_swizzle].size == 32);
+ }
+ else {
+ assert(format_desc->channel[z_swizzle].type ==
+ UTIL_FORMAT_TYPE_UNSIGNED);
+ assert(format_desc->channel[z_swizzle].normalized);
+ assert(!z_type.fixed);
+ }
+ }
+ }
+
+
+ /* Setup build context for Z vals */
+ lp_build_context_init(&z_bld, gallivm, z_type);
+
+ /* Setup build context for stencil vals */
+ s_type = lp_int_type(z_type);
+ lp_build_context_init(&s_bld, gallivm, s_type);
+
+ /* Compute and apply the Z/stencil bitmasks and shifts.
+ */
+ {
+ unsigned s_shift, s_mask;
+
+ z_dst = z_fb;
+ stencil_vals = s_fb;
+
+ have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
+ have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);
+
+ if (have_z) {
+ if (z_mask != 0xffffffff) {
+ z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
+ }
+
+ /*
+ * Align the framebuffer Z 's LSB to the right.
+ */
+ if (z_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
+ z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
+ } else if (z_bitmask) {
+ z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
+ } else {
+ lp_build_name(z_dst, "z_dst");
+ }
+ }
+
+ if (have_s) {
+ if (s_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
+ stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
+ stencil_shift = shift; /* used below */
+ }
+
+ if (s_mask != 0xffffffff) {
+ LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
+ stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
+ }
+
+ lp_build_name(stencil_vals, "s_dst");
+ }
+ }
+
+ if (stencil[0].enabled) {
+
+ if (face) {
+ if (0) {
+ /*
+ * XXX: the scalar expansion below produces atrocious code
+ * (basically producing a 64bit scalar value, then moving the 2
+ * 32bit pieces separately to simd, plus 4 shuffles, which is
+ * seriously lame). But the scalar-simd transitions are always
+ * tricky, so no big surprise there.
+ * This here would be way better, however llvm has some serious
+ * trouble later using it in the select, probably because it will
+ * recognize the expression as constant and move the simd value
+ * away (out of the loop) - and then it will suddenly try
+ * constructing i1 high-bit masks out of it later...
+ * (Try piglit stencil-twoside.)
+ * Note this is NOT due to using SExt/Trunc, it fails exactly the
+ * same even when using native compare/select.
+ * I cannot reproduce this problem when using stand-alone compiler
+ * though, suggesting some problem with optimization passes...
+ * (With stand-alone compilation, the construction of this mask
+ * value, no matter if the easy 3 instruction here or the complex
+ * 16+ one below, never gets separated from where it's used.)
+ * The scalar code still has the same problem, but the generated
+ * code looks a bit better at least for some reason, even if
+ * mostly by luck (the fundamental issue clearly is the same).
+ */
+ front_facing = lp_build_broadcast(gallivm, s_bld.vec_type, face);
+ /* front_facing = face != 0 ? ~0 : 0 */
+ front_facing = lp_build_compare(gallivm, s_bld.type,
+ PIPE_FUNC_NOTEQUAL,
+ front_facing, s_bld.zero);
+ } else {
+ LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
+
+ /* front_facing = face != 0 ? ~0 : 0 */
+ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
+ front_facing = LLVMBuildSExt(builder, front_facing,
+ LLVMIntTypeInContext(gallivm->context,
+ s_bld.type.length*s_bld.type.width),
+ "");
+ front_facing = LLVMBuildBitCast(builder, front_facing,
+ s_bld.int_vec_type, "");
+
+ }
+ }
+
+ s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
+ stencil_refs, stencil_vals,
+ front_facing);
+
+ /* apply stencil-fail operator */
+ {
+ LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask);
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
+ stencil_refs, stencil_vals,
+ s_fail_mask, front_facing);
+ }
+ }
+
+ if (depth->enabled) {
+ /*
+ * Convert fragment Z to the desired type, aligning the LSB to the right.
+ */
+
+ assert(z_type.width == z_src_type.width);
+ assert(z_type.length == z_src_type.length);
+ assert(lp_check_value(z_src_type, z_src));
+ if (z_src_type.floating) {
+ /*
+ * Convert from floating point values
+ */
+
+ if (!z_type.floating) {
+ z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
+ z_src_type,
+ z_width,
+ z_src);
+ }
+ } else {
+ /*
+ * Convert from unsigned normalized values.
+ */
+
+ assert(!z_src_type.sign);
+ assert(!z_src_type.fixed);
+ assert(z_src_type.norm);
+ assert(!z_type.floating);
+ if (z_src_type.width > z_width) {
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
+ z_src_type.width - z_width);
+ z_src = LLVMBuildLShr(builder, z_src, shift, "");
+ }
+ }
+ assert(lp_check_value(z_type, z_src));
+
+ lp_build_name(z_src, "z_src");
+
+ /* compare src Z to dst Z, returning 'pass' mask */
+ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
+
+ /* mask off bits that failed stencil test */
+ if (s_pass_mask) {
+ current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
+ }
+
+ if (!stencil[0].enabled) {
+ /* We can potentially skip all remaining operations here, but only
+ * if stencil is disabled because we still need to update the stencil
+ * buffer values. Don't need to update Z buffer values.
+ */
+ lp_build_mask_update(mask, z_pass);
+
+ if (do_branch) {
+ lp_build_mask_check(mask);
+ }
+ }
+
+ if (depth->writemask) {
+ LLVMValueRef z_pass_mask;
+
+ /* mask off bits that failed Z test */
+ z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
+
+ /* Mix the old and new Z buffer values.
+ * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
+ */
+ z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst);
+ }
+
+ if (stencil[0].enabled) {
+ /* update stencil buffer values according to z pass/fail result */
+ LLVMValueRef z_fail_mask, z_pass_mask;
+
+ /* apply Z-fail operator */
+ z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass);
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
+ stencil_refs, stencil_vals,
+ z_fail_mask, front_facing);
+
+ /* apply Z-pass operator */
+ z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
+ stencil_refs, stencil_vals,
+ z_pass_mask, front_facing);
+ }
+ }
+ else {
+ /* No depth test: apply Z-pass operator to stencil buffer values which
+ * passed the stencil test.
+ */
+ s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
+ stencil_refs, stencil_vals,
+ s_pass_mask, front_facing);
+ }
+
+ /* Put Z and stencil bits in the right place */
+ if (have_z && z_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
+ z_dst = LLVMBuildShl(builder, z_dst, shift, "");
+ }
+ if (stencil_vals && stencil_shift)
+ stencil_vals = LLVMBuildShl(builder, stencil_vals,
+ stencil_shift, "");
+
+ /* Finally, merge the z/stencil values */
+ if (format_desc->block.bits <= 32) {
+ if (have_z && have_s)
+ *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
+ else if (have_z)
+ *z_value = z_dst;
+ else
+ *z_value = stencil_vals;
+ *s_value = *z_value;
+ }
+ else {
+ *z_value = z_dst;
+ *s_value = stencil_vals;
+ }
+
+ if (s_pass_mask)
+ lp_build_mask_update(mask, s_pass_mask);
+
+ if (depth->enabled && stencil[0].enabled)
+ lp_build_mask_update(mask, z_pass);