r300: Zero-initialize register for NV_vertex_program
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_blend_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46 #include "pipe/p_state.h"
47
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_swizzle.h"
53 #include "lp_bld_blend.h"
54 #include "lp_bld_debug.h"
55
56
57 /**
58 * We may the same values several times, so we keep them here to avoid
59 * recomputing them. Also reusing the values allows us to do simplifications
60 * that LLVM optimization passes wouldn't normally be able to do.
61 */
62 struct lp_build_blend_aos_context
63 {
64 struct lp_build_context base;
65
66 LLVMValueRef src;
67 LLVMValueRef dst;
68 LLVMValueRef const_;
69
70 LLVMValueRef inv_src;
71 LLVMValueRef inv_dst;
72 LLVMValueRef inv_const;
73 LLVMValueRef saturate;
74
75 LLVMValueRef rgb_src_factor;
76 LLVMValueRef alpha_src_factor;
77 LLVMValueRef rgb_dst_factor;
78 LLVMValueRef alpha_dst_factor;
79 };
80
81
82 static LLVMValueRef
83 lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
84 unsigned factor,
85 boolean alpha)
86 {
87 switch (factor) {
88 case PIPE_BLENDFACTOR_ZERO:
89 return bld->base.zero;
90 case PIPE_BLENDFACTOR_ONE:
91 return bld->base.one;
92 case PIPE_BLENDFACTOR_SRC_COLOR:
93 case PIPE_BLENDFACTOR_SRC_ALPHA:
94 return bld->src;
95 case PIPE_BLENDFACTOR_DST_COLOR:
96 case PIPE_BLENDFACTOR_DST_ALPHA:
97 return bld->dst;
98 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
99 if(alpha)
100 return bld->base.one;
101 else {
102 if(!bld->inv_dst)
103 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
104 if(!bld->saturate)
105 bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
106 return bld->saturate;
107 }
108 case PIPE_BLENDFACTOR_CONST_COLOR:
109 case PIPE_BLENDFACTOR_CONST_ALPHA:
110 return bld->const_;
111 case PIPE_BLENDFACTOR_SRC1_COLOR:
112 case PIPE_BLENDFACTOR_SRC1_ALPHA:
113 /* TODO */
114 assert(0);
115 return bld->base.zero;
116 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
117 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
118 if(!bld->inv_src)
119 bld->inv_src = lp_build_comp(&bld->base, bld->src);
120 return bld->inv_src;
121 case PIPE_BLENDFACTOR_INV_DST_COLOR:
122 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
123 if(!bld->inv_dst)
124 bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
125 return bld->inv_dst;
126 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
127 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
128 if(!bld->inv_const)
129 bld->inv_const = lp_build_comp(&bld->base, bld->const_);
130 return bld->inv_const;
131 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
132 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
133 /* TODO */
134 assert(0);
135 return bld->base.zero;
136 default:
137 assert(0);
138 return bld->base.zero;
139 }
140 }
141
142
143 enum lp_build_blend_swizzle {
144 LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
145 LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
146 };
147
148
149 /**
150 * How should we shuffle the base factor.
151 */
152 static enum lp_build_blend_swizzle
153 lp_build_blend_factor_swizzle(unsigned factor)
154 {
155 switch (factor) {
156 case PIPE_BLENDFACTOR_ONE:
157 case PIPE_BLENDFACTOR_ZERO:
158 case PIPE_BLENDFACTOR_SRC_COLOR:
159 case PIPE_BLENDFACTOR_DST_COLOR:
160 case PIPE_BLENDFACTOR_CONST_COLOR:
161 case PIPE_BLENDFACTOR_SRC1_COLOR:
162 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
163 case PIPE_BLENDFACTOR_INV_DST_COLOR:
164 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
165 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
166 return LP_BUILD_BLEND_SWIZZLE_RGBA;
167 case PIPE_BLENDFACTOR_SRC_ALPHA:
168 case PIPE_BLENDFACTOR_DST_ALPHA:
169 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
170 case PIPE_BLENDFACTOR_SRC1_ALPHA:
171 case PIPE_BLENDFACTOR_CONST_ALPHA:
172 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
173 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
174 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
175 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
176 return LP_BUILD_BLEND_SWIZZLE_AAAA;
177 default:
178 assert(0);
179 return LP_BUILD_BLEND_SWIZZLE_RGBA;
180 }
181 }
182
183
184 static LLVMValueRef
185 lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
186 LLVMValueRef rgb,
187 LLVMValueRef alpha,
188 enum lp_build_blend_swizzle rgb_swizzle,
189 unsigned alpha_swizzle)
190 {
191 if(rgb == alpha) {
192 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
193 return rgb;
194 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
195 return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
196 }
197 else {
198 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
199 boolean cond[4] = {0, 0, 0, 0};
200 cond[alpha_swizzle] = 1;
201 return lp_build_select_aos(&bld->base, alpha, rgb, cond);
202 }
203 if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
204 unsigned char swizzle[4];
205 swizzle[0] = alpha_swizzle;
206 swizzle[1] = alpha_swizzle;
207 swizzle[2] = alpha_swizzle;
208 swizzle[3] = alpha_swizzle;
209 swizzle[alpha_swizzle] += 4;
210 return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
211 }
212 }
213 assert(0);
214 return bld->base.undef;
215 }
216
217
218 /**
219 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
220 */
221 static LLVMValueRef
222 lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
223 LLVMValueRef factor1,
224 unsigned rgb_factor,
225 unsigned alpha_factor,
226 unsigned alpha_swizzle)
227 {
228 LLVMValueRef rgb_factor_;
229 LLVMValueRef alpha_factor_;
230 LLVMValueRef factor2;
231 enum lp_build_blend_swizzle rgb_swizzle;
232
233 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
234 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
235
236 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
237
238 factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
239
240 return lp_build_mul(&bld->base, factor1, factor2);
241 }
242
243
244 boolean
245 lp_build_blend_func_commutative(unsigned func)
246 {
247 switch (func) {
248 case PIPE_BLEND_ADD:
249 case PIPE_BLEND_MIN:
250 case PIPE_BLEND_MAX:
251 return TRUE;
252 case PIPE_BLEND_SUBTRACT:
253 case PIPE_BLEND_REVERSE_SUBTRACT:
254 return FALSE;
255 default:
256 assert(0);
257 return TRUE;
258 }
259 }
260
261
262 boolean
263 lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
264 {
265 if(rgb_func == alpha_func)
266 return FALSE;
267 if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
268 return TRUE;
269 if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
270 return TRUE;
271 return FALSE;
272 }
273
274
275 /**
276 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
277 */
278 LLVMValueRef
279 lp_build_blend_func(struct lp_build_context *bld,
280 unsigned func,
281 LLVMValueRef term1,
282 LLVMValueRef term2)
283 {
284 switch (func) {
285 case PIPE_BLEND_ADD:
286 return lp_build_add(bld, term1, term2);
287 break;
288 case PIPE_BLEND_SUBTRACT:
289 return lp_build_sub(bld, term1, term2);
290 case PIPE_BLEND_REVERSE_SUBTRACT:
291 return lp_build_sub(bld, term2, term1);
292 case PIPE_BLEND_MIN:
293 return lp_build_min(bld, term1, term2);
294 case PIPE_BLEND_MAX:
295 return lp_build_max(bld, term1, term2);
296 default:
297 assert(0);
298 return bld->zero;
299 }
300 }
301
302
303 LLVMValueRef
304 lp_build_blend_aos(LLVMBuilderRef builder,
305 const struct pipe_blend_state *blend,
306 union lp_type type,
307 LLVMValueRef src,
308 LLVMValueRef dst,
309 LLVMValueRef const_,
310 unsigned alpha_swizzle)
311 {
312 struct lp_build_blend_aos_context bld;
313 LLVMValueRef src_term;
314 LLVMValueRef dst_term;
315
316 /* FIXME */
317 assert(blend->colormask == 0xf);
318
319 if(!blend->blend_enable)
320 return src;
321
322 /* It makes no sense to blend unless values are normalized */
323 assert(type.norm);
324
325 /* Setup build context */
326 memset(&bld, 0, sizeof bld);
327 lp_build_context_init(&bld.base, builder, type);
328 bld.src = src;
329 bld.dst = dst;
330 bld.const_ = const_;
331
332 /* TODO: There are still a few optimization opportunities here. For certain
333 * combinations it is possible to reorder the operations and therefore saving
334 * some instructions. */
335
336 src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
337 dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
338
339 lp_build_name(src_term, "src_term");
340 lp_build_name(dst_term, "dst_term");
341
342 if(blend->rgb_func == blend->alpha_func) {
343 return lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term);
344 }
345 else {
346 /* Seperate RGB / A functions */
347
348 LLVMValueRef rgb;
349 LLVMValueRef alpha;
350
351 rgb = lp_build_blend_func(&bld.base, blend->rgb_func, src_term, dst_term);
352 alpha = lp_build_blend_func(&bld.base, blend->alpha_func, src_term, dst_term);
353
354 return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
355 }
356 }