Merge remote-tracking branch 'public/master' into vulkan
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_blend_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46 #include "pipe/p_state.h"
47 #include "util/u_debug.h"
48 #include "util/u_format.h"
49
50 #include "gallivm/lp_bld_type.h"
51 #include "gallivm/lp_bld_const.h"
52 #include "gallivm/lp_bld_arit.h"
53 #include "gallivm/lp_bld_logic.h"
54 #include "gallivm/lp_bld_swizzle.h"
55 #include "gallivm/lp_bld_bitarit.h"
56 #include "gallivm/lp_bld_debug.h"
57
58 #include "lp_bld_blend.h"
59
60
61 /**
62 * We may the same values several times, so we keep them here to avoid
63 * recomputing them. Also reusing the values allows us to do simplifications
64 * that LLVM optimization passes wouldn't normally be able to do.
65 */
66 struct lp_build_blend_aos_context
67 {
68 struct lp_build_context base;
69
70 LLVMValueRef src;
71 LLVMValueRef src_alpha;
72 LLVMValueRef src1;
73 LLVMValueRef src1_alpha;
74 LLVMValueRef dst;
75 LLVMValueRef const_;
76 LLVMValueRef const_alpha;
77
78 LLVMValueRef inv_src;
79 LLVMValueRef inv_src_alpha;
80 LLVMValueRef inv_dst;
81 LLVMValueRef inv_const;
82 LLVMValueRef inv_const_alpha;
83 LLVMValueRef saturate;
84
85 LLVMValueRef rgb_src_factor;
86 LLVMValueRef alpha_src_factor;
87 LLVMValueRef rgb_dst_factor;
88 LLVMValueRef alpha_dst_factor;
89 };
90
91
92 static LLVMValueRef
93 lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
94 unsigned factor,
95 boolean alpha)
96 {
97 LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src;
98 LLVMValueRef src1_alpha = bld->src1_alpha ? bld->src1_alpha : bld->src1;
99 LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_;
100
101 switch (factor) {
102 case PIPE_BLENDFACTOR_ZERO:
103 return bld->base.zero;
104 case PIPE_BLENDFACTOR_ONE:
105 return bld->base.one;
106 case PIPE_BLENDFACTOR_SRC_COLOR:
107 return bld->src;
108 case PIPE_BLENDFACTOR_SRC_ALPHA:
109 return src_alpha;
110 case PIPE_BLENDFACTOR_DST_COLOR:
111 case PIPE_BLENDFACTOR_DST_ALPHA:
112 return bld->dst;
113 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
114 if(alpha)
115 return bld->base.one;
116 else {
117 /*
118 * if there's separate src_alpha there's no dst alpha hence the complement
119 * is zero but for unclamped float inputs min can be non-zero (negative).
120 */
121 if (bld->src_alpha) {
122 if (!bld->saturate)
123 bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero);
124 }
125 else {
126 if(!bld->inv_dst)
127 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
128 if(!bld->saturate)
129 bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst);
130 }
131 return bld->saturate;
132 }
133 case PIPE_BLENDFACTOR_CONST_COLOR:
134 return bld->const_;
135 case PIPE_BLENDFACTOR_CONST_ALPHA:
136 return const_alpha;
137 case PIPE_BLENDFACTOR_SRC1_COLOR:
138 return bld->src1;
139 case PIPE_BLENDFACTOR_SRC1_ALPHA:
140 return src1_alpha;
141 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
142 if(!bld->inv_src)
143 bld->inv_src = lp_build_comp(&bld->base, bld->src);
144 return bld->inv_src;
145 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
146 if(!bld->inv_src_alpha)
147 bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha);
148 return bld->inv_src_alpha;
149 case PIPE_BLENDFACTOR_INV_DST_COLOR:
150 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
151 if(!bld->inv_dst)
152 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
153 return bld->inv_dst;
154 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
155 if(!bld->inv_const)
156 bld->inv_const = lp_build_comp(&bld->base, bld->const_);
157 return bld->inv_const;
158 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
159 if(!bld->inv_const_alpha)
160 bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha);
161 return bld->inv_const_alpha;
162 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
163 return lp_build_comp(&bld->base, bld->src1);
164 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
165 return lp_build_comp(&bld->base, src1_alpha);
166 default:
167 assert(0);
168 return bld->base.zero;
169 }
170 }
171
172
173 enum lp_build_blend_swizzle {
174 LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
175 LP_BUILD_BLEND_SWIZZLE_AAAA = 1
176 };
177
178
179 /**
180 * How should we shuffle the base factor.
181 */
182 static enum lp_build_blend_swizzle
183 lp_build_blend_factor_swizzle(unsigned factor)
184 {
185 switch (factor) {
186 case PIPE_BLENDFACTOR_ONE:
187 case PIPE_BLENDFACTOR_ZERO:
188 case PIPE_BLENDFACTOR_SRC_COLOR:
189 case PIPE_BLENDFACTOR_DST_COLOR:
190 case PIPE_BLENDFACTOR_CONST_COLOR:
191 case PIPE_BLENDFACTOR_SRC1_COLOR:
192 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
193 case PIPE_BLENDFACTOR_INV_DST_COLOR:
194 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
195 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
196 return LP_BUILD_BLEND_SWIZZLE_RGBA;
197 case PIPE_BLENDFACTOR_SRC_ALPHA:
198 case PIPE_BLENDFACTOR_DST_ALPHA:
199 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
200 case PIPE_BLENDFACTOR_SRC1_ALPHA:
201 case PIPE_BLENDFACTOR_CONST_ALPHA:
202 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
203 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
204 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
205 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
206 return LP_BUILD_BLEND_SWIZZLE_AAAA;
207 default:
208 assert(0);
209 return LP_BUILD_BLEND_SWIZZLE_RGBA;
210 }
211 }
212
213
214 static LLVMValueRef
215 lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
216 LLVMValueRef rgb,
217 LLVMValueRef alpha,
218 enum lp_build_blend_swizzle rgb_swizzle,
219 unsigned alpha_swizzle,
220 unsigned num_channels)
221 {
222 LLVMValueRef swizzled_rgb;
223
224 switch (rgb_swizzle) {
225 case LP_BUILD_BLEND_SWIZZLE_RGBA:
226 swizzled_rgb = rgb;
227 break;
228 case LP_BUILD_BLEND_SWIZZLE_AAAA:
229 swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels);
230 break;
231 default:
232 assert(0);
233 swizzled_rgb = bld->base.undef;
234 }
235
236 if (rgb != alpha) {
237 swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
238 alpha, swizzled_rgb,
239 num_channels);
240 }
241
242 return swizzled_rgb;
243 }
244
245 /**
246 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
247 */
248 static LLVMValueRef
249 lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
250 unsigned rgb_factor,
251 unsigned alpha_factor,
252 unsigned alpha_swizzle,
253 unsigned num_channels)
254 {
255 LLVMValueRef rgb_factor_, alpha_factor_;
256 enum lp_build_blend_swizzle rgb_swizzle;
257
258 if (alpha_swizzle == UTIL_FORMAT_SWIZZLE_X && num_channels == 1) {
259 return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
260 }
261
262 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
263
264 if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
265 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
266 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
267 return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
268 } else {
269 return rgb_factor_;
270 }
271 }
272
273
274 /**
275 * Performs blending of src and dst pixels
276 *
277 * @param blend the blend state of the shader variant
278 * @param cbuf_format format of the colour buffer
279 * @param type data type of the pixel vector
280 * @param rt render target index
281 * @param src blend src
282 * @param src_alpha blend src alpha (if not included in src)
283 * @param src1 second blend src (for dual source blend)
284 * @param src1_alpha second blend src alpha (if not included in src1)
285 * @param dst blend dst
286 * @param mask optional mask to apply to the blending result
287 * @param const_ const blend color
288 * @param const_alpha const blend color alpha (if not included in const_)
289 * @param swizzle swizzle values for RGBA
290 *
291 * @return the result of blending src and dst
292 */
293 LLVMValueRef
294 lp_build_blend_aos(struct gallivm_state *gallivm,
295 const struct pipe_blend_state *blend,
296 enum pipe_format cbuf_format,
297 struct lp_type type,
298 unsigned rt,
299 LLVMValueRef src,
300 LLVMValueRef src_alpha,
301 LLVMValueRef src1,
302 LLVMValueRef src1_alpha,
303 LLVMValueRef dst,
304 LLVMValueRef mask,
305 LLVMValueRef const_,
306 LLVMValueRef const_alpha,
307 const unsigned char swizzle[4],
308 int nr_channels)
309 {
310 const struct pipe_rt_blend_state * state = &blend->rt[rt];
311 const struct util_format_description * desc;
312 struct lp_build_blend_aos_context bld;
313 LLVMValueRef src_factor, dst_factor;
314 LLVMValueRef result;
315 unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
316 unsigned i;
317
318 desc = util_format_description(cbuf_format);
319
320 /* Setup build context */
321 memset(&bld, 0, sizeof bld);
322 lp_build_context_init(&bld.base, gallivm, type);
323 bld.src = src;
324 bld.src1 = src1;
325 bld.dst = dst;
326 bld.const_ = const_;
327 bld.src_alpha = src_alpha;
328 bld.src1_alpha = src1_alpha;
329 bld.const_alpha = const_alpha;
330
331 /* Find the alpha channel if not provided seperately */
332 if (!src_alpha) {
333 for (i = 0; i < 4; ++i) {
334 if (swizzle[i] == 3) {
335 alpha_swizzle = i;
336 }
337 }
338 }
339
340 if (blend->logicop_enable) {
341 if(!type.floating) {
342 result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst);
343 }
344 else {
345 result = src;
346 }
347 } else if (!state->blend_enable) {
348 result = src;
349 } else {
350 boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
351
352 src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
353 state->alpha_src_factor,
354 alpha_swizzle,
355 nr_channels);
356
357 dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
358 state->alpha_dst_factor,
359 alpha_swizzle,
360 nr_channels);
361
362 result = lp_build_blend(&bld.base,
363 state->rgb_func,
364 state->rgb_src_factor,
365 state->rgb_dst_factor,
366 src,
367 dst,
368 src_factor,
369 dst_factor,
370 rgb_alpha_same,
371 false);
372
373 if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
374 LLVMValueRef alpha;
375
376 alpha = lp_build_blend(&bld.base,
377 state->alpha_func,
378 state->alpha_src_factor,
379 state->alpha_dst_factor,
380 src,
381 dst,
382 src_factor,
383 dst_factor,
384 rgb_alpha_same,
385 false);
386
387 result = lp_build_blend_swizzle(&bld,
388 result,
389 alpha,
390 LP_BUILD_BLEND_SWIZZLE_RGBA,
391 alpha_swizzle,
392 nr_channels);
393 }
394 }
395
396 /* Check if color mask is necessary */
397 if (!util_format_colormask_full(desc, state->colormask)) {
398 LLVMValueRef color_mask;
399
400 color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
401 lp_build_name(color_mask, "color_mask");
402
403 /* Combine with input mask if necessary */
404 if (mask) {
405 /* We can be blending floating values but masks are always integer... */
406 unsigned floating = bld.base.type.floating;
407 bld.base.type.floating = 0;
408
409 mask = lp_build_and(&bld.base, color_mask, mask);
410
411 bld.base.type.floating = floating;
412 } else {
413 mask = color_mask;
414 }
415 }
416
417 /* Apply mask, if one exists */
418 if (mask) {
419 result = lp_build_select(&bld.base, mask, result, dst);
420 }
421
422 return result;
423 }