gallivm/llvmpipe: basic stencil testing works
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_depth.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Depth/stencil testing to LLVM IR translation.
31 *
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
37 * artifacts.
38 *
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
42 *
43 * Z11 Z12 Z13 Z14 ...
44 * Z21 Z22 Z23 Z24 ...
45 * Z31 Z32 Z33 Z34 ...
46 * Z41 Z42 Z43 Z44 ...
47 * ... ... ... ... ...
48 *
49 * will actually be stored in memory as
50 *
51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 * ... ... ... ... ... ... ... ... ...
54 *
55 * FIXME: Code generate stencil test
56 *
57 * @author Jose Fonseca <jfonseca@vmware.com>
58 */
59
60 #include "pipe/p_state.h"
61 #include "util/u_format.h"
62
63 #include "lp_bld_type.h"
64 #include "lp_bld_arit.h"
65 #include "lp_bld_const.h"
66 #include "lp_bld_logic.h"
67 #include "lp_bld_flow.h"
68 #include "lp_bld_debug.h"
69 #include "lp_bld_depth.h"
70 #include "lp_bld_swizzle.h"
71
72
73
74 /**
75 * Do the stencil test comparison (compare fb Z values against ref value.
76 * \param stencilVals vector of stencil values from framebuffer
77 * \param stencilRef the stencil reference value, replicated as a vector
78 * \return mask of pass/fail values
79 */
80 static LLVMValueRef
81 lp_build_stencil_test(struct lp_build_context *bld,
82 const struct pipe_stencil_state *stencil,
83 LLVMValueRef stencilRef,
84 LLVMValueRef stencilVals)
85 {
86 const unsigned stencilMax = 255; /* XXX fix */
87 struct lp_type type = bld->type;
88 LLVMValueRef res;
89
90 assert(stencil->enabled);
91
92 if (stencil->valuemask != stencilMax) {
93 /* compute stencilRef = stencilRef & valuemask */
94 LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
95 stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
96 /* compute stencilVals = stencilVals & valuemask */
97 stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
98 }
99
100 res = lp_build_cmp(bld, stencil->func, stencilVals, stencilRef);
101
102 return res;
103 }
104
105
106 /**
107 * Apply the stencil operator (add/sub/keep/etc) to the given vector
108 * of stencil values.
109 * \return new stencil values vector
110 */
111 static LLVMValueRef
112 lp_build_stencil_op(struct lp_build_context *bld,
113 const struct pipe_stencil_state *stencil,
114 unsigned stencil_op,
115 LLVMValueRef stencilRef,
116 LLVMValueRef stencilVals,
117 LLVMValueRef mask)
118
119 {
120 const unsigned stencilMax = 255; /* XXX fix */
121 struct lp_type type = bld->type;
122 LLVMValueRef res;
123 LLVMValueRef max = lp_build_const_int_vec(type, stencilMax);
124
125 switch (stencil_op) {
126 case PIPE_STENCIL_OP_KEEP:
127 res = stencilVals;
128 /* we can return early for this case */
129 return res;
130 case PIPE_STENCIL_OP_ZERO:
131 res = bld->zero;
132 break;
133 case PIPE_STENCIL_OP_REPLACE:
134 res = stencilRef;
135 break;
136 case PIPE_STENCIL_OP_INCR:
137 res = lp_build_add(bld, stencilVals, bld->one);
138 res = lp_build_min(bld, res, max);
139 break;
140 case PIPE_STENCIL_OP_DECR:
141 res = lp_build_sub(bld, stencilVals, bld->one);
142 res = lp_build_max(bld, res, bld->zero);
143 break;
144 case PIPE_STENCIL_OP_INCR_WRAP:
145 res = lp_build_add(bld, stencilVals, bld->one);
146 res = LLVMBuildAnd(bld->builder, res, max, "");
147 break;
148 case PIPE_STENCIL_OP_DECR_WRAP:
149 res = lp_build_sub(bld, stencilVals, bld->one);
150 res = LLVMBuildAnd(bld->builder, res, max, "");
151 break;
152 case PIPE_STENCIL_OP_INVERT:
153 res = LLVMBuildNot(bld->builder, stencilVals, "");
154 break;
155 default:
156 assert(0 && "bad stencil op mode");
157 res = NULL;
158 }
159
160 if (stencil->writemask != stencilMax) {
161 /* compute res = (res & mask) | (stencilVals & ~mask) */
162 LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask);
163 LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask");
164 LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1");
165 LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2");
166 res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2");
167 }
168
169 /* only the update the vector elements enabled by 'mask' */
170 res = lp_build_select(bld, mask, res, stencilVals);
171
172 return res;
173 }
174
175
176 /**
177 * Return a type appropriate for depth/stencil testing.
178 */
179 struct lp_type
180 lp_depth_type(const struct util_format_description *format_desc,
181 unsigned length)
182 {
183 struct lp_type type;
184 unsigned swizzle;
185
186 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
187 assert(format_desc->block.width == 1);
188 assert(format_desc->block.height == 1);
189
190 swizzle = format_desc->swizzle[0];
191 assert(swizzle < 4);
192
193 memset(&type, 0, sizeof type);
194 type.width = format_desc->block.bits;
195
196 if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
197 type.floating = TRUE;
198 assert(swizzle == 0);
199 assert(format_desc->channel[swizzle].size == format_desc->block.bits);
200 }
201 else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
202 assert(format_desc->block.bits <= 32);
203 if(format_desc->channel[swizzle].normalized)
204 type.norm = TRUE;
205 }
206 else
207 assert(0);
208
209 assert(type.width <= length);
210 type.length = length / type.width;
211
212 return type;
213 }
214
215
216 static LLVMValueRef
217 lp_build_get_stencil_ref(struct lp_build_context *bld,
218 struct lp_type type, LLVMValueRef stencil_refs_ptr)
219 {
220 LLVMValueRef indexes[2], ptr, ref, ref_vec;
221
222 /* load 0th element of the array */
223 indexes[0] = indexes[1] = LLVMConstInt(LLVMInt32Type(), 0, 0);
224 ptr = LLVMBuildGEP(bld->builder, stencil_refs_ptr, indexes, 2, "");
225 ref = LLVMBuildLoad(bld->builder, ptr, "");
226
227 /* convert int8 value to i32 */
228 ref = LLVMBuildZExt(bld->builder, ref, LLVMIntType(type.width), "");
229
230 /* make scalar into vector */
231 ref_vec = lp_build_broadcast_scalar(bld, ref);
232
233 return ref_vec;
234 }
235
236
237 /**
238 * Generate code for performing depth and/or stencil tests.
239 * We operate on a vector of values (typically a 2x2 quad).
240 *
241 * \param type the data type of the fragment depth/stencil values
242 * \param format_desc description of the depth/stencil surface
243 * \param mask the alive/dead pixel mask for the quad
244 * \param src the incoming depth/stencil values (a 2x2 quad)
245 * \param dst_ptr the outgoing/updated depth/stencil values
246 */
247 void
248 lp_build_depth_stencil_test(LLVMBuilderRef builder,
249 const struct pipe_depth_state *depth,
250 const struct pipe_stencil_state stencil[2],
251 struct lp_type type,
252 const struct util_format_description *format_desc,
253 struct lp_build_mask_context *mask,
254 LLVMValueRef stencil_refs,
255 LLVMValueRef z_src,
256 LLVMValueRef zs_dst_ptr)
257 {
258 struct lp_build_context bld;
259 unsigned z_swizzle, s_swizzle;
260 LLVMValueRef zs_dst, z_dst = NULL;
261 LLVMValueRef stencil_vals = NULL;
262 LLVMValueRef z_bitmask = NULL, s_bitmask = NULL;
263 LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
264 LLVMValueRef orig_mask = mask->value;
265
266 assert(depth->enabled || stencil[0].enabled);
267
268 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
269 assert(format_desc->block.width == 1);
270 assert(format_desc->block.height == 1);
271
272 z_swizzle = format_desc->swizzle[0];
273 s_swizzle = format_desc->swizzle[1];
274
275 assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
276 s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
277
278 /* Sanity checking */
279 assert(z_swizzle < 4);
280 assert(format_desc->block.bits == type.width);
281 if(type.floating) {
282 assert(z_swizzle == 0);
283 assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT);
284 assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
285 }
286 else {
287 assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
288 assert(format_desc->channel[z_swizzle].normalized);
289 assert(!type.fixed);
290 assert(!type.sign);
291 assert(type.norm);
292 }
293
294 /* Setup build context */
295 lp_build_context_init(&bld, builder, type);
296
297 /* Load current z/stencil value from z/stencil buffer */
298 zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
299
300 lp_build_name(zs_dst, "zsbufval");
301
302 /* Align the source depth bits with the destination's, and mask out any
303 * stencil or padding bits from both */
304 if(format_desc->channel[z_swizzle].size == format_desc->block.bits) {
305 assert(z_swizzle == 0);
306 z_dst = zs_dst;
307 }
308 else {
309 /* shift/mask bits to right-justify the Z bits */
310 unsigned padding_left;
311 unsigned padding_right;
312 unsigned chan;
313
314 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
315 assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
316 assert(format_desc->channel[z_swizzle].size <= format_desc->block.bits);
317 assert(format_desc->channel[z_swizzle].normalized);
318
319 padding_right = 0;
320 for(chan = 0; chan < z_swizzle; ++chan)
321 padding_right += format_desc->channel[chan].size;
322 padding_left = format_desc->block.bits -
323 (padding_right + format_desc->channel[z_swizzle].size);
324
325 if(padding_left || padding_right) {
326 const unsigned long long mask_left = (1ULL << (format_desc->block.bits - padding_left)) - 1;
327 const unsigned long long mask_right = (1ULL << (padding_right)) - 1;
328 z_bitmask = lp_build_const_int_vec(type, mask_left ^ mask_right);
329 }
330
331 s_bitmask = LLVMBuildNot(builder, z_bitmask, "");
332
333 stencil_vals = LLVMBuildAnd(builder, zs_dst, s_bitmask, "");
334
335 if(padding_left)
336 z_src = LLVMBuildLShr(builder, z_src,
337 lp_build_const_int_vec(type, padding_left), "");
338 if(padding_right)
339 z_src = LLVMBuildAnd(builder, z_src, z_bitmask, "");
340 if(padding_left || padding_right)
341 z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "");
342 else
343 z_dst = zs_dst;
344 }
345
346 lp_build_name(z_dst, "zsbuf.z");
347
348 /*
349 printf("build depth %d stencil %d\n",
350 depth->enabled,
351 stencil[0].enabled);
352 */
353
354 if (stencil[0].enabled) {
355 /* Incoming stencil_refs is ptr to int8[2]. Get/convert to int32[4]. */
356 stencil_refs = lp_build_get_stencil_ref(&bld, type, stencil_refs);
357
358 s_pass_mask = lp_build_stencil_test(&bld, stencil,
359 stencil_refs, stencil_vals);
360
361 /* apply stencil-fail operator */
362 {
363 LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask);
364 stencil_vals = lp_build_stencil_op(&bld, stencil, stencil[0].fail_op,
365 stencil_refs, stencil_vals,
366 s_fail_mask);
367 }
368 }
369
370 if (depth->enabled) {
371 /* compare src Z to dst Z, returning 'pass' mask */
372 z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);
373
374 if (!stencil[0].enabled) {
375 /* We can potentially skip all remaining operations here, but only
376 * if stencil is disabled because we still need to update the stencil
377 * buffer values. Don't need to update Z buffer values.
378 */
379 lp_build_mask_update(mask, z_pass);
380 }
381
382 if (depth->writemask) {
383 if(z_bitmask)
384 z_bitmask = LLVMBuildAnd(builder, mask->value, z_bitmask, "");
385 else
386 z_bitmask = mask->value;
387
388 z_dst = lp_build_select(&bld, z_bitmask, z_src, z_dst);
389 }
390
391 if (stencil[0].enabled) {
392 /* update stencil buffer values according to z pass/fail result */
393 LLVMValueRef z_fail_mask, z_pass_mask;
394
395 /* apply Z-fail operator */
396 z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass);
397 stencil_vals = lp_build_stencil_op(&bld, stencil, stencil[0].zfail_op,
398 stencil_refs, stencil_vals,
399 z_fail_mask);
400
401 /* apply Z-pass operator */
402 z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
403 stencil_vals = lp_build_stencil_op(&bld, stencil, stencil[0].zpass_op,
404 stencil_refs, stencil_vals,
405 z_pass_mask);
406 }
407 }
408 else {
409 /* No depth test: apply Z-pass operator to stencil buffer values which
410 * passed the stencil test.
411 */
412 s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
413 stencil_vals = lp_build_stencil_op(&bld, stencil, stencil[0].zpass_op,
414 stencil_refs, stencil_vals, s_pass_mask);
415 }
416
417 /* Finally, merge/store the z/stencil values */
418 if ((depth->enabled && depth->writemask) ||
419 (stencil[0].enabled && stencil[0].writemask)) {
420
421 if (z_dst && stencil_vals)
422 zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
423 else if (z_dst)
424 zs_dst = z_dst;
425 else
426 zs_dst = stencil_vals;
427
428 LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
429 }
430
431 if (s_pass_mask)
432 lp_build_mask_update(mask, s_pass_mask);
433
434 if (depth->enabled && stencil[0].enabled)
435 lp_build_mask_update(mask, z_pass);
436 }