llvmpipe: fix using wrong format with MRT in blend code
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_blend_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46 #include "pipe/p_state.h"
47 #include "util/u_debug.h"
48 #include "util/u_format.h"
49
50 #include "gallivm/lp_bld_type.h"
51 #include "gallivm/lp_bld_const.h"
52 #include "gallivm/lp_bld_arit.h"
53 #include "gallivm/lp_bld_logic.h"
54 #include "gallivm/lp_bld_swizzle.h"
55 #include "gallivm/lp_bld_bitarit.h"
56 #include "gallivm/lp_bld_debug.h"
57
58 #include "lp_bld_blend.h"
59
60
61 /**
62 * We may the same values several times, so we keep them here to avoid
63 * recomputing them. Also reusing the values allows us to do simplifications
64 * that LLVM optimization passes wouldn't normally be able to do.
65 */
66 struct lp_build_blend_aos_context
67 {
68 struct lp_build_context base;
69
70 LLVMValueRef src;
71 LLVMValueRef src_alpha;
72 LLVMValueRef dst;
73 LLVMValueRef const_;
74 LLVMValueRef const_alpha;
75
76 LLVMValueRef inv_src;
77 LLVMValueRef inv_src_alpha;
78 LLVMValueRef inv_dst;
79 LLVMValueRef inv_const;
80 LLVMValueRef inv_const_alpha;
81 LLVMValueRef saturate;
82
83 LLVMValueRef rgb_src_factor;
84 LLVMValueRef alpha_src_factor;
85 LLVMValueRef rgb_dst_factor;
86 LLVMValueRef alpha_dst_factor;
87 };
88
89
90 static LLVMValueRef
91 lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
92 unsigned factor,
93 boolean alpha)
94 {
95 LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src;
96 LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_;
97
98 switch (factor) {
99 case PIPE_BLENDFACTOR_ZERO:
100 return bld->base.zero;
101 case PIPE_BLENDFACTOR_ONE:
102 return bld->base.one;
103 case PIPE_BLENDFACTOR_SRC_COLOR:
104 return bld->src;
105 case PIPE_BLENDFACTOR_SRC_ALPHA:
106 return src_alpha;
107 case PIPE_BLENDFACTOR_DST_COLOR:
108 case PIPE_BLENDFACTOR_DST_ALPHA:
109 return bld->dst;
110 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
111 if(alpha)
112 return bld->base.one;
113 else {
114 if(!bld->inv_dst)
115 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
116 if(!bld->saturate)
117 bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst);
118 return bld->saturate;
119 }
120 case PIPE_BLENDFACTOR_CONST_COLOR:
121 return bld->const_;
122 case PIPE_BLENDFACTOR_CONST_ALPHA:
123 return const_alpha;
124 case PIPE_BLENDFACTOR_SRC1_COLOR:
125 case PIPE_BLENDFACTOR_SRC1_ALPHA:
126 /* TODO */
127 assert(0);
128 return bld->base.zero;
129 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
130 if(!bld->inv_src)
131 bld->inv_src = lp_build_comp(&bld->base, bld->src);
132 return bld->inv_src;
133 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
134 if(!bld->inv_src_alpha)
135 bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha);
136 return bld->inv_src_alpha;
137 case PIPE_BLENDFACTOR_INV_DST_COLOR:
138 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
139 if(!bld->inv_dst)
140 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
141 return bld->inv_dst;
142 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
143 if(!bld->inv_const)
144 bld->inv_const = lp_build_comp(&bld->base, bld->const_);
145 return bld->inv_const;
146 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
147 if(!bld->inv_const_alpha)
148 bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha);
149 return bld->inv_const_alpha;
150 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
151 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
152 /* TODO */
153 assert(0);
154 return bld->base.zero;
155 default:
156 assert(0);
157 return bld->base.zero;
158 }
159 }
160
161
162 enum lp_build_blend_swizzle {
163 LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
164 LP_BUILD_BLEND_SWIZZLE_AAAA = 1
165 };
166
167
168 /**
169 * How should we shuffle the base factor.
170 */
171 static enum lp_build_blend_swizzle
172 lp_build_blend_factor_swizzle(unsigned factor)
173 {
174 switch (factor) {
175 case PIPE_BLENDFACTOR_ONE:
176 case PIPE_BLENDFACTOR_ZERO:
177 case PIPE_BLENDFACTOR_SRC_COLOR:
178 case PIPE_BLENDFACTOR_DST_COLOR:
179 case PIPE_BLENDFACTOR_CONST_COLOR:
180 case PIPE_BLENDFACTOR_SRC1_COLOR:
181 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
182 case PIPE_BLENDFACTOR_INV_DST_COLOR:
183 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
184 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
185 return LP_BUILD_BLEND_SWIZZLE_RGBA;
186 case PIPE_BLENDFACTOR_SRC_ALPHA:
187 case PIPE_BLENDFACTOR_DST_ALPHA:
188 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
189 case PIPE_BLENDFACTOR_SRC1_ALPHA:
190 case PIPE_BLENDFACTOR_CONST_ALPHA:
191 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
192 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
193 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
194 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
195 return LP_BUILD_BLEND_SWIZZLE_AAAA;
196 default:
197 assert(0);
198 return LP_BUILD_BLEND_SWIZZLE_RGBA;
199 }
200 }
201
202
203 static LLVMValueRef
204 lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
205 LLVMValueRef rgb,
206 LLVMValueRef alpha,
207 enum lp_build_blend_swizzle rgb_swizzle,
208 unsigned alpha_swizzle,
209 unsigned num_channels)
210 {
211 LLVMValueRef swizzled_rgb;
212
213 switch (rgb_swizzle) {
214 case LP_BUILD_BLEND_SWIZZLE_RGBA:
215 swizzled_rgb = rgb;
216 break;
217 case LP_BUILD_BLEND_SWIZZLE_AAAA:
218 swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels);
219 break;
220 default:
221 assert(0);
222 swizzled_rgb = bld->base.undef;
223 }
224
225 if (rgb != alpha) {
226 swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
227 alpha, swizzled_rgb,
228 num_channels);
229 }
230
231 return swizzled_rgb;
232 }
233
234 /**
235 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
236 */
237 static LLVMValueRef
238 lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
239 unsigned rgb_factor,
240 unsigned alpha_factor,
241 unsigned alpha_swizzle,
242 unsigned num_channels)
243 {
244 LLVMValueRef rgb_factor_, alpha_factor_;
245 enum lp_build_blend_swizzle rgb_swizzle;
246
247 if (alpha_swizzle == 0) {
248 return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
249 }
250
251 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
252
253 if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
254 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
255 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
256 return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
257 } else {
258 return rgb_factor_;
259 }
260 }
261
262
263 /**
264 * Performs blending of src and dst pixels
265 *
266 * @param blend the blend state of the shader variant
267 * @param cbuf_format format of the colour buffer
268 * @param type data type of the pixel vector
269 * @param rt render target index
270 * @param src blend src
271 * @param dst blend dst
272 * @param mask optional mask to apply to the blending result
273 * @param const_ const blend color
274 * @param swizzle swizzle values for RGBA
275 *
276 * @return the result of blending src and dst
277 */
278 LLVMValueRef
279 lp_build_blend_aos(struct gallivm_state *gallivm,
280 const struct pipe_blend_state *blend,
281 const enum pipe_format cbuf_format,
282 struct lp_type type,
283 unsigned rt,
284 LLVMValueRef src,
285 LLVMValueRef src_alpha,
286 LLVMValueRef dst,
287 LLVMValueRef mask,
288 LLVMValueRef const_,
289 LLVMValueRef const_alpha,
290 const unsigned char swizzle[4],
291 int nr_channels)
292 {
293 const struct pipe_rt_blend_state * state = &blend->rt[rt];
294 const struct util_format_description * desc;
295 struct lp_build_blend_aos_context bld;
296 LLVMValueRef src_factor, dst_factor;
297 LLVMValueRef result;
298 unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
299 unsigned i;
300
301 desc = util_format_description(cbuf_format);
302
303 /* Setup build context */
304 memset(&bld, 0, sizeof bld);
305 lp_build_context_init(&bld.base, gallivm, type);
306 bld.src = src;
307 bld.dst = dst;
308 bld.const_ = const_;
309 bld.src_alpha = src_alpha;
310 bld.const_alpha = const_alpha;
311
312 /* Find the alpha channel if not provided seperately */
313 if (!src_alpha) {
314 for (i = 0; i < 4; ++i) {
315 if (swizzle[i] == 3) {
316 alpha_swizzle = i;
317 }
318 }
319 }
320
321 if (blend->logicop_enable) {
322 if(!type.floating) {
323 result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst);
324 }
325 else {
326 result = src;
327 }
328 } else if (!state->blend_enable) {
329 result = src;
330 } else {
331 boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
332
333 src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
334 state->alpha_src_factor,
335 alpha_swizzle,
336 nr_channels);
337
338 dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
339 state->alpha_dst_factor,
340 alpha_swizzle,
341 nr_channels);
342
343 result = lp_build_blend(&bld.base,
344 state->rgb_func,
345 state->rgb_src_factor,
346 state->rgb_dst_factor,
347 src,
348 dst,
349 src_factor,
350 dst_factor,
351 rgb_alpha_same,
352 false);
353
354 if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
355 LLVMValueRef alpha;
356
357 alpha = lp_build_blend(&bld.base,
358 state->alpha_func,
359 state->alpha_src_factor,
360 state->alpha_dst_factor,
361 src,
362 dst,
363 src_factor,
364 dst_factor,
365 rgb_alpha_same,
366 false);
367
368 result = lp_build_blend_swizzle(&bld,
369 result,
370 alpha,
371 LP_BUILD_BLEND_SWIZZLE_RGBA,
372 alpha_swizzle,
373 nr_channels);
374 }
375 }
376
377 /* Check if color mask is necessary */
378 if (!util_format_colormask_full(desc, state->colormask)) {
379 LLVMValueRef color_mask;
380
381 color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
382 lp_build_name(color_mask, "color_mask");
383
384 /* Combine with input mask if necessary */
385 if (mask) {
386 /* We can be blending floating values but masks are always integer... */
387 unsigned floating = bld.base.type.floating;
388 bld.base.type.floating = 0;
389
390 mask = lp_build_and(&bld.base, color_mask, mask);
391
392 bld.base.type.floating = floating;
393 } else {
394 mask = color_mask;
395 }
396 }
397
398 /* Apply mask, if one exists */
399 if (mask) {
400 result = lp_build_select(&bld.base, mask, result, dst);
401 }
402
403 return result;
404 }