Merge commit 'origin/7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_blend_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46 #include "pipe/p_state.h"
47 #include "util/u_debug.h"
48
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_blend.h"
55 #include "lp_bld_debug.h"
56
57
58 /**
59 * We may the same values several times, so we keep them here to avoid
60 * recomputing them. Also reusing the values allows us to do simplifications
61 * that LLVM optimization passes wouldn't normally be able to do.
62 */
63 struct lp_build_blend_aos_context
64 {
65 struct lp_build_context base;
66
67 LLVMValueRef src;
68 LLVMValueRef dst;
69 LLVMValueRef const_;
70
71 LLVMValueRef inv_src;
72 LLVMValueRef inv_dst;
73 LLVMValueRef inv_const;
74 LLVMValueRef saturate;
75
76 LLVMValueRef rgb_src_factor;
77 LLVMValueRef alpha_src_factor;
78 LLVMValueRef rgb_dst_factor;
79 LLVMValueRef alpha_dst_factor;
80 };
81
82
83 static LLVMValueRef
84 lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
85 unsigned factor,
86 boolean alpha)
87 {
88 switch (factor) {
89 case PIPE_BLENDFACTOR_ZERO:
90 return bld->base.zero;
91 case PIPE_BLENDFACTOR_ONE:
92 return bld->base.one;
93 case PIPE_BLENDFACTOR_SRC_COLOR:
94 case PIPE_BLENDFACTOR_SRC_ALPHA:
95 return bld->src;
96 case PIPE_BLENDFACTOR_DST_COLOR:
97 case PIPE_BLENDFACTOR_DST_ALPHA:
98 return bld->dst;
99 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
100 if(alpha)
101 return bld->base.one;
102 else {
103 if(!bld->inv_dst)
104 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
105 if(!bld->saturate)
106 bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
107 return bld->saturate;
108 }
109 case PIPE_BLENDFACTOR_CONST_COLOR:
110 case PIPE_BLENDFACTOR_CONST_ALPHA:
111 return bld->const_;
112 case PIPE_BLENDFACTOR_SRC1_COLOR:
113 case PIPE_BLENDFACTOR_SRC1_ALPHA:
114 /* TODO */
115 assert(0);
116 return bld->base.zero;
117 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
118 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
119 if(!bld->inv_src)
120 bld->inv_src = lp_build_comp(&bld->base, bld->src);
121 return bld->inv_src;
122 case PIPE_BLENDFACTOR_INV_DST_COLOR:
123 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
124 if(!bld->inv_dst)
125 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
126 return bld->inv_dst;
127 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
128 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
129 if(!bld->inv_const)
130 bld->inv_const = lp_build_comp(&bld->base, bld->const_);
131 return bld->inv_const;
132 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
133 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
134 /* TODO */
135 assert(0);
136 return bld->base.zero;
137 default:
138 assert(0);
139 return bld->base.zero;
140 }
141 }
142
143
144 enum lp_build_blend_swizzle {
145 LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
146 LP_BUILD_BLEND_SWIZZLE_AAAA = 1
147 };
148
149
150 /**
151 * How should we shuffle the base factor.
152 */
153 static enum lp_build_blend_swizzle
154 lp_build_blend_factor_swizzle(unsigned factor)
155 {
156 switch (factor) {
157 case PIPE_BLENDFACTOR_ONE:
158 case PIPE_BLENDFACTOR_ZERO:
159 case PIPE_BLENDFACTOR_SRC_COLOR:
160 case PIPE_BLENDFACTOR_DST_COLOR:
161 case PIPE_BLENDFACTOR_CONST_COLOR:
162 case PIPE_BLENDFACTOR_SRC1_COLOR:
163 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
164 case PIPE_BLENDFACTOR_INV_DST_COLOR:
165 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
166 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
167 return LP_BUILD_BLEND_SWIZZLE_RGBA;
168 case PIPE_BLENDFACTOR_SRC_ALPHA:
169 case PIPE_BLENDFACTOR_DST_ALPHA:
170 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
171 case PIPE_BLENDFACTOR_SRC1_ALPHA:
172 case PIPE_BLENDFACTOR_CONST_ALPHA:
173 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
174 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
175 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
176 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
177 return LP_BUILD_BLEND_SWIZZLE_AAAA;
178 default:
179 assert(0);
180 return LP_BUILD_BLEND_SWIZZLE_RGBA;
181 }
182 }
183
184
185 static LLVMValueRef
186 lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
187 LLVMValueRef rgb,
188 LLVMValueRef alpha,
189 enum lp_build_blend_swizzle rgb_swizzle,
190 unsigned alpha_swizzle)
191 {
192 if(rgb == alpha) {
193 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
194 return rgb;
195 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
196 return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
197 }
198 else {
199 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
200 boolean cond[4] = {0, 0, 0, 0};
201 cond[alpha_swizzle] = 1;
202 return lp_build_select_aos(&bld->base, alpha, rgb, cond);
203 }
204 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
205 unsigned char swizzle[4];
206 swizzle[0] = alpha_swizzle;
207 swizzle[1] = alpha_swizzle;
208 swizzle[2] = alpha_swizzle;
209 swizzle[3] = alpha_swizzle;
210 swizzle[alpha_swizzle] += 4;
211 return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
212 }
213 }
214 assert(0);
215 return bld->base.undef;
216 }
217
218
219 /**
220 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
221 */
222 static LLVMValueRef
223 lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
224 LLVMValueRef factor1,
225 unsigned rgb_factor,
226 unsigned alpha_factor,
227 unsigned alpha_swizzle)
228 {
229 LLVMValueRef rgb_factor_;
230 LLVMValueRef alpha_factor_;
231 LLVMValueRef factor2;
232 enum lp_build_blend_swizzle rgb_swizzle;
233
234 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
235 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
236
237 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
238
239 factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
240
241 return lp_build_mul(&bld->base, factor1, factor2);
242 }
243
244
245 boolean
246 lp_build_blend_func_commutative(unsigned func)
247 {
248 switch (func) {
249 case PIPE_BLEND_ADD:
250 case PIPE_BLEND_MIN:
251 case PIPE_BLEND_MAX:
252 return TRUE;
253 case PIPE_BLEND_SUBTRACT:
254 case PIPE_BLEND_REVERSE_SUBTRACT:
255 return FALSE;
256 default:
257 assert(0);
258 return TRUE;
259 }
260 }
261
262
263 boolean
264 lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
265 {
266 if(rgb_func == alpha_func)
267 return FALSE;
268 if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
269 return TRUE;
270 if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
271 return TRUE;
272 return FALSE;
273 }
274
275
276 /**
277 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
278 */
279 LLVMValueRef
280 lp_build_blend_func(struct lp_build_context *bld,
281 unsigned func,
282 LLVMValueRef term1,
283 LLVMValueRef term2)
284 {
285 switch (func) {
286 case PIPE_BLEND_ADD:
287 return lp_build_add(bld, term1, term2);
288 break;
289 case PIPE_BLEND_SUBTRACT:
290 return lp_build_sub(bld, term1, term2);
291 case PIPE_BLEND_REVERSE_SUBTRACT:
292 return lp_build_sub(bld, term2, term1);
293 case PIPE_BLEND_MIN:
294 return lp_build_min(bld, term1, term2);
295 case PIPE_BLEND_MAX:
296 return lp_build_max(bld, term1, term2);
297 default:
298 assert(0);
299 return bld->zero;
300 }
301 }
302
303
304 LLVMValueRef
305 lp_build_blend_aos(LLVMBuilderRef builder,
306 const struct pipe_blend_state *blend,
307 struct lp_type type,
308 LLVMValueRef src,
309 LLVMValueRef dst,
310 LLVMValueRef const_,
311 unsigned alpha_swizzle)
312 {
313 struct lp_build_blend_aos_context bld;
314 LLVMValueRef src_term;
315 LLVMValueRef dst_term;
316
317 /* FIXME */
318 assert(blend->independent_blend_enable == 0);
319 assert(blend->rt[0].colormask == 0xf);
320
321 if(!blend->rt[0].blend_enable)
322 return src;
323
324 /* It makes no sense to blend unless values are normalized */
325 assert(type.norm);
326
327 /* Setup build context */
328 memset(&bld, 0, sizeof bld);
329 lp_build_context_init(&bld.base, builder, type);
330 bld.src = src;
331 bld.dst = dst;
332 bld.const_ = const_;
333
334 /* TODO: There are still a few optimization opportunities here. For certain
335 * combinations it is possible to reorder the operations and therefore saving
336 * some instructions. */
337
338 src_term = lp_build_blend_factor(&bld, src, blend->rt[0].rgb_src_factor,
339 blend->rt[0].alpha_src_factor, alpha_swizzle);
340 dst_term = lp_build_blend_factor(&bld, dst, blend->rt[0].rgb_dst_factor,
341 blend->rt[0].alpha_dst_factor, alpha_swizzle);
342
343 lp_build_name(src_term, "src_term");
344 lp_build_name(dst_term, "dst_term");
345
346 if(blend->rt[0].rgb_func == blend->rt[0].alpha_func) {
347 return lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term);
348 }
349 else {
350 /* Seperate RGB / A functions */
351
352 LLVMValueRef rgb;
353 LLVMValueRef alpha;
354
355 rgb = lp_build_blend_func(&bld.base, blend->rt[0].rgb_func, src_term, dst_term);
356 alpha = lp_build_blend_func(&bld.base, blend->rt[0].alpha_func, src_term, dst_term);
357
358 return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
359 }
360 }