{
unsigned total_bits;
unsigned z_swizzle;
- unsigned chan;
- unsigned padding_left, padding_right;
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
assert(format_desc->block.width == 1);
return FALSE;
*width = format_desc->channel[z_swizzle].size;
+ /* & 31 is for the same reason as the 32-bit limit above */
+ *shift = format_desc->channel[z_swizzle].shift & 31;
- padding_right = 0;
- for (chan = 0; chan < z_swizzle; ++chan)
- padding_right += format_desc->channel[chan].size;
-
- padding_left =
- total_bits - (padding_right + *width);
-
- if (padding_left || padding_right) {
- unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
- unsigned long long mask_right = (1ULL << (padding_right)) - 1;
- *mask = mask_left ^ mask_right;
- }
- else {
+ if (*width == total_bits) {
*mask = 0xffffffff;
+ } else {
+ *mask = ((1 << *width) - 1) << *shift;
}
- *shift = padding_right;
-
return TRUE;
}
unsigned *shift, unsigned *mask)
{
unsigned s_swizzle;
- unsigned chan, sz;
+ unsigned sz;
s_swizzle = format_desc->swizzle[1];
/* just special case 64bit d/s format */
if (format_desc->block.bits > 32) {
+ /* XXX big-endian? */
assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
*shift = 0;
*mask = 0xff;
return TRUE;
}
- *shift = 0;
- for (chan = 0; chan < s_swizzle; chan++)
- *shift += format_desc->channel[chan].size;
-
+ *shift = format_desc->channel[s_swizzle].shift;
sz = format_desc->channel[s_swizzle].size;
*mask = (1U << sz) - 1U;
* Test the depth mask. Add the number of channel which has none zero mask
* into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
* The counter will add 4.
+ * TODO: could get that out of the fs loop.
*
* \param type holds element type of the mask vector.
* \param maskvalue is the depth test mask.
LLVMInt32TypeInContext(context), bits);
count = lp_build_intrinsic_unary(builder, popcntintr,
LLVMInt32TypeInContext(context), bits);
+ count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
}
else if(util_cpu_caps.has_avx && type.length == 8) {
const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
LLVMInt32TypeInContext(context), bits);
count = lp_build_intrinsic_unary(builder, popcntintr,
LLVMInt32TypeInContext(context), bits);
+ count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
}
else {
unsigned i;
}
count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
- if (type.length > 4) {
- count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
+ if (type.length > 8) {
+ count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), "");
+ }
+ else if (type.length < 8) {
+ count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
}
}
newcount = LLVMBuildLoad(builder, counter, "origcount");
*
* \param type the data type of the fragment depth/stencil values
* \param format_desc description of the depth/stencil surface
+ * \param is_1d whether this resource has only one dimension
* \param loop_counter the current loop iteration
* \param depth_ptr pointer to the depth/stencil values of this 4x4 block
* \param depth_stride stride of the depth/stencil buffer
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
+ boolean is_1d,
LLVMValueRef depth_ptr,
LLVMValueRef depth_stride,
LLVMValueRef *z_fb,
zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
- zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
- zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
- zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+ if (is_1d) {
+ zs_dst2 = lp_build_undef(gallivm, zs_load_type);
+ }
+ else {
+ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
+ zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+ }
*z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
LLVMConstVector(shuffles, zs_type.length), "");
*
* \param type the data type of the fragment depth/stencil values
* \param format_desc description of the depth/stencil surface
+ * \param is_1d whether this resource has only one dimension
* \param mask the alive/dead pixel mask for the quad (vector)
* \param z_fb z values read from fb (with padding)
* \param s_fb s values read from fb (with padding)
lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
+ boolean is_1d,
struct lp_build_mask_context *mask,
LLVMValueRef z_fb,
LLVMValueRef s_fb,
}
LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
- LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+ if (!is_1d) {
+ LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+ }
}
/**
LLVMValueRef stencil_vals = NULL;
LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
- LLVMValueRef orig_mask = lp_build_mask_value(mask);
+ LLVMValueRef current_mask = lp_build_mask_value(mask);
LLVMValueRef front_facing = NULL;
boolean have_z, have_s;
s_bld.int_vec_type, "");
}
- /* convert scalar stencil refs into vectors */
- stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
- stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
-
s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
stencil_refs, stencil_vals,
front_facing);
/* apply stencil-fail operator */
{
- LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
+ LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask);
stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
stencil_refs, stencil_vals,
s_fail_mask, front_facing);
/* compare src Z to dst Z, returning 'pass' mask */
z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
+ /* mask off bits that failed stencil test */
+ if (s_pass_mask) {
+ current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
+ }
+
if (!stencil[0].enabled) {
/* We can potentially skip all remaining operations here, but only
* if stencil is disabled because we still need to update the stencil
if (do_branch) {
lp_build_mask_check(mask);
- do_branch = FALSE;
}
}
if (depth->writemask) {
- LLVMValueRef zselectmask;
+ LLVMValueRef z_pass_mask;
/* mask off bits that failed Z test */
- zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
-
- /* mask off bits that failed stencil test */
- if (s_pass_mask) {
- zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
- }
+ z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
/* Mix the old and new Z buffer values.
* z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
*/
- z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
+ z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst);
}
if (stencil[0].enabled) {
LLVMValueRef z_fail_mask, z_pass_mask;
/* apply Z-fail operator */
- z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
+ z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass);
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
stencil_refs, stencil_vals,
z_fail_mask, front_facing);
/* apply Z-pass operator */
- z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
+ z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
z_pass_mask, front_facing);
/* No depth test: apply Z-pass operator to stencil buffer values which
* passed the stencil test.
*/
- s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
+ s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
stencil_refs, stencil_vals,
s_pass_mask, front_facing);
if (depth->enabled && stencil[0].enabled)
lp_build_mask_update(mask, z_pass);
-
- if (do_branch)
- lp_build_mask_check(mask);
-
}