llvmpipe: Temporary workaround to prevent segfault on array textures.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_blend_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46 #include "pipe/p_state.h"
47 #include "util/u_debug.h"
48 #include "util/u_format.h"
49
50 #include "gallivm/lp_bld_type.h"
51 #include "gallivm/lp_bld_const.h"
52 #include "gallivm/lp_bld_arit.h"
53 #include "gallivm/lp_bld_logic.h"
54 #include "gallivm/lp_bld_swizzle.h"
55 #include "gallivm/lp_bld_bitarit.h"
56 #include "gallivm/lp_bld_debug.h"
57
58 #include "lp_bld_blend.h"
59
60
61 /**
62 * We may the same values several times, so we keep them here to avoid
63 * recomputing them. Also reusing the values allows us to do simplifications
64 * that LLVM optimization passes wouldn't normally be able to do.
65 */
66 struct lp_build_blend_aos_context
67 {
68 struct lp_build_context base;
69
70 LLVMValueRef src;
71 LLVMValueRef src_alpha;
72 LLVMValueRef src1;
73 LLVMValueRef src1_alpha;
74 LLVMValueRef dst;
75 LLVMValueRef const_;
76 LLVMValueRef const_alpha;
77
78 LLVMValueRef inv_src;
79 LLVMValueRef inv_src_alpha;
80 LLVMValueRef inv_dst;
81 LLVMValueRef inv_const;
82 LLVMValueRef inv_const_alpha;
83 LLVMValueRef saturate;
84
85 LLVMValueRef rgb_src_factor;
86 LLVMValueRef alpha_src_factor;
87 LLVMValueRef rgb_dst_factor;
88 LLVMValueRef alpha_dst_factor;
89 };
90
91
92 static LLVMValueRef
93 lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
94 unsigned factor,
95 boolean alpha)
96 {
97 LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src;
98 LLVMValueRef src1_alpha = bld->src1_alpha ? bld->src1_alpha : bld->src1;
99 LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_;
100
101 switch (factor) {
102 case PIPE_BLENDFACTOR_ZERO:
103 return bld->base.zero;
104 case PIPE_BLENDFACTOR_ONE:
105 return bld->base.one;
106 case PIPE_BLENDFACTOR_SRC_COLOR:
107 return bld->src;
108 case PIPE_BLENDFACTOR_SRC_ALPHA:
109 return src_alpha;
110 case PIPE_BLENDFACTOR_DST_COLOR:
111 case PIPE_BLENDFACTOR_DST_ALPHA:
112 return bld->dst;
113 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
114 if(alpha)
115 return bld->base.one;
116 else {
117 if(!bld->inv_dst)
118 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
119 if(!bld->saturate)
120 bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst);
121 return bld->saturate;
122 }
123 case PIPE_BLENDFACTOR_CONST_COLOR:
124 return bld->const_;
125 case PIPE_BLENDFACTOR_CONST_ALPHA:
126 return const_alpha;
127 case PIPE_BLENDFACTOR_SRC1_COLOR:
128 return bld->src1;
129 case PIPE_BLENDFACTOR_SRC1_ALPHA:
130 return src1_alpha;
131 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
132 if(!bld->inv_src)
133 bld->inv_src = lp_build_comp(&bld->base, bld->src);
134 return bld->inv_src;
135 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
136 if(!bld->inv_src_alpha)
137 bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha);
138 return bld->inv_src_alpha;
139 case PIPE_BLENDFACTOR_INV_DST_COLOR:
140 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
141 if(!bld->inv_dst)
142 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
143 return bld->inv_dst;
144 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
145 if(!bld->inv_const)
146 bld->inv_const = lp_build_comp(&bld->base, bld->const_);
147 return bld->inv_const;
148 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
149 if(!bld->inv_const_alpha)
150 bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha);
151 return bld->inv_const_alpha;
152 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
153 return lp_build_comp(&bld->base, bld->src1);
154 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
155 return lp_build_comp(&bld->base, src1_alpha);
156 default:
157 assert(0);
158 return bld->base.zero;
159 }
160 }
161
162
163 enum lp_build_blend_swizzle {
164 LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
165 LP_BUILD_BLEND_SWIZZLE_AAAA = 1
166 };
167
168
169 /**
170 * How should we shuffle the base factor.
171 */
172 static enum lp_build_blend_swizzle
173 lp_build_blend_factor_swizzle(unsigned factor)
174 {
175 switch (factor) {
176 case PIPE_BLENDFACTOR_ONE:
177 case PIPE_BLENDFACTOR_ZERO:
178 case PIPE_BLENDFACTOR_SRC_COLOR:
179 case PIPE_BLENDFACTOR_DST_COLOR:
180 case PIPE_BLENDFACTOR_CONST_COLOR:
181 case PIPE_BLENDFACTOR_SRC1_COLOR:
182 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
183 case PIPE_BLENDFACTOR_INV_DST_COLOR:
184 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
185 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
186 return LP_BUILD_BLEND_SWIZZLE_RGBA;
187 case PIPE_BLENDFACTOR_SRC_ALPHA:
188 case PIPE_BLENDFACTOR_DST_ALPHA:
189 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
190 case PIPE_BLENDFACTOR_SRC1_ALPHA:
191 case PIPE_BLENDFACTOR_CONST_ALPHA:
192 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
193 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
194 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
195 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
196 return LP_BUILD_BLEND_SWIZZLE_AAAA;
197 default:
198 assert(0);
199 return LP_BUILD_BLEND_SWIZZLE_RGBA;
200 }
201 }
202
203
204 static LLVMValueRef
205 lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
206 LLVMValueRef rgb,
207 LLVMValueRef alpha,
208 enum lp_build_blend_swizzle rgb_swizzle,
209 unsigned alpha_swizzle,
210 unsigned num_channels)
211 {
212 LLVMValueRef swizzled_rgb;
213
214 switch (rgb_swizzle) {
215 case LP_BUILD_BLEND_SWIZZLE_RGBA:
216 swizzled_rgb = rgb;
217 break;
218 case LP_BUILD_BLEND_SWIZZLE_AAAA:
219 swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels);
220 break;
221 default:
222 assert(0);
223 swizzled_rgb = bld->base.undef;
224 }
225
226 if (rgb != alpha) {
227 swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
228 alpha, swizzled_rgb,
229 num_channels);
230 }
231
232 return swizzled_rgb;
233 }
234
235 /**
236 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
237 */
238 static LLVMValueRef
239 lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
240 unsigned rgb_factor,
241 unsigned alpha_factor,
242 unsigned alpha_swizzle,
243 unsigned num_channels)
244 {
245 LLVMValueRef rgb_factor_, alpha_factor_;
246 enum lp_build_blend_swizzle rgb_swizzle;
247
248 if (alpha_swizzle == 0) {
249 return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
250 }
251
252 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
253
254 if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
255 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
256 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
257 return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
258 } else {
259 return rgb_factor_;
260 }
261 }
262
263
264 /**
265 * Performs blending of src and dst pixels
266 *
267 * @param blend the blend state of the shader variant
268 * @param cbuf_format format of the colour buffer
269 * @param type data type of the pixel vector
270 * @param rt render target index
271 * @param src blend src
272 * @param src_alpha blend src alpha (if not included in src)
273 * @param src1 second blend src (for dual source blend)
274 * @param src1_alpha second blend src alpha (if not included in src1)
275 * @param dst blend dst
276 * @param mask optional mask to apply to the blending result
277 * @param const_ const blend color
278 * @param const_alpha const blend color alpha (if not included in const_)
279 * @param swizzle swizzle values for RGBA
280 *
281 * @return the result of blending src and dst
282 */
283 LLVMValueRef
284 lp_build_blend_aos(struct gallivm_state *gallivm,
285 const struct pipe_blend_state *blend,
286 enum pipe_format cbuf_format,
287 struct lp_type type,
288 unsigned rt,
289 LLVMValueRef src,
290 LLVMValueRef src_alpha,
291 LLVMValueRef src1,
292 LLVMValueRef src1_alpha,
293 LLVMValueRef dst,
294 LLVMValueRef mask,
295 LLVMValueRef const_,
296 LLVMValueRef const_alpha,
297 const unsigned char swizzle[4],
298 int nr_channels)
299 {
300 const struct pipe_rt_blend_state * state = &blend->rt[rt];
301 const struct util_format_description * desc;
302 struct lp_build_blend_aos_context bld;
303 LLVMValueRef src_factor, dst_factor;
304 LLVMValueRef result;
305 unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
306 unsigned i;
307
308 desc = util_format_description(cbuf_format);
309
310 /* Setup build context */
311 memset(&bld, 0, sizeof bld);
312 lp_build_context_init(&bld.base, gallivm, type);
313 bld.src = src;
314 bld.src1 = src1;
315 bld.dst = dst;
316 bld.const_ = const_;
317 bld.src_alpha = src_alpha;
318 bld.src1_alpha = src1_alpha;
319 bld.const_alpha = const_alpha;
320
321 /* Find the alpha channel if not provided seperately */
322 if (!src_alpha) {
323 for (i = 0; i < 4; ++i) {
324 if (swizzle[i] == 3) {
325 alpha_swizzle = i;
326 }
327 }
328 }
329
330 if (blend->logicop_enable) {
331 if(!type.floating) {
332 result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst);
333 }
334 else {
335 result = src;
336 }
337 } else if (!state->blend_enable) {
338 result = src;
339 } else {
340 boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
341
342 src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
343 state->alpha_src_factor,
344 alpha_swizzle,
345 nr_channels);
346
347 dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
348 state->alpha_dst_factor,
349 alpha_swizzle,
350 nr_channels);
351
352 result = lp_build_blend(&bld.base,
353 state->rgb_func,
354 state->rgb_src_factor,
355 state->rgb_dst_factor,
356 src,
357 dst,
358 src_factor,
359 dst_factor,
360 rgb_alpha_same,
361 false);
362
363 if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
364 LLVMValueRef alpha;
365
366 alpha = lp_build_blend(&bld.base,
367 state->alpha_func,
368 state->alpha_src_factor,
369 state->alpha_dst_factor,
370 src,
371 dst,
372 src_factor,
373 dst_factor,
374 rgb_alpha_same,
375 false);
376
377 result = lp_build_blend_swizzle(&bld,
378 result,
379 alpha,
380 LP_BUILD_BLEND_SWIZZLE_RGBA,
381 alpha_swizzle,
382 nr_channels);
383 }
384 }
385
386 /* Check if color mask is necessary */
387 if (!util_format_colormask_full(desc, state->colormask)) {
388 LLVMValueRef color_mask;
389
390 color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
391 lp_build_name(color_mask, "color_mask");
392
393 /* Combine with input mask if necessary */
394 if (mask) {
395 /* We can be blending floating values but masks are always integer... */
396 unsigned floating = bld.base.type.floating;
397 bld.base.type.floating = 0;
398
399 mask = lp_build_and(&bld.base, color_mask, mask);
400
401 bld.base.type.floating = floating;
402 } else {
403 mask = color_mask;
404 }
405 }
406
407 /* Apply mask, if one exists */
408 if (mask) {
409 result = lp_build_select(&bld.base, mask, result, dst);
410 }
411
412 return result;
413 }