Rewrite blending in terms of SIMD operations.
[mesa.git] / src / mesa / pipe / softpipe / sp_quad_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * quad blending
30 * \author Brian Paul
31 */
32
33 #include "glheader.h"
34 #include "imports.h"
35 #include "macros.h"
36 #include "pipe/p_defines.h"
37 #include "sp_context.h"
38 #include "sp_headers.h"
39 #include "sp_surface.h"
40 #include "sp_quad.h"
41
42
43 #define VEC4_COPY(DST, SRC) \
44 do { \
45 DST[0] = SRC[0]; \
46 DST[1] = SRC[1]; \
47 DST[2] = SRC[2]; \
48 DST[3] = SRC[3]; \
49 } while(0)
50
51 #define VEC4_SCALAR(DST, SRC) \
52 do { \
53 DST[0] = SRC; \
54 DST[1] = SRC; \
55 DST[2] = SRC; \
56 DST[3] = SRC; \
57 } while(0)
58
59 #define VEC4_ADD(SUM, A, B) \
60 do { \
61 SUM[0] = A[0] + B[0]; \
62 SUM[1] = A[1] + B[1]; \
63 SUM[2] = A[2] + B[2]; \
64 SUM[3] = A[3] + B[3]; \
65 } while (0)
66
67 #define VEC4_SUB(SUM, A, B) \
68 do { \
69 SUM[0] = A[0] - B[0]; \
70 SUM[1] = A[1] - B[1]; \
71 SUM[2] = A[2] - B[2]; \
72 SUM[3] = A[3] - B[3]; \
73 } while (0)
74
75 #define VEC4_MUL(SUM, A, B) \
76 do { \
77 SUM[0] = A[0] * B[0]; \
78 SUM[1] = A[1] * B[1]; \
79 SUM[2] = A[2] * B[2]; \
80 SUM[3] = A[3] * B[3]; \
81 } while (0)
82
83 #define VEC4_MIN(SUM, A, B) \
84 do { \
85 SUM[0] = (A[0] < B[0]) ? A[0] : B[0]; \
86 SUM[1] = (A[1] < B[1]) ? A[1] : B[1]; \
87 SUM[2] = (A[2] < B[2]) ? A[2] : B[2]; \
88 SUM[3] = (A[3] < B[3]) ? A[3] : B[3]; \
89 } while (0)
90
91 #define VEC4_MAX(SUM, A, B) \
92 do { \
93 SUM[0] = (A[0] > B[0]) ? A[0] : B[0]; \
94 SUM[1] = (A[1] > B[1]) ? A[1] : B[1]; \
95 SUM[2] = (A[2] > B[2]) ? A[2] : B[2]; \
96 SUM[3] = (A[3] > B[3]) ? A[3] : B[3]; \
97 } while (0)
98
99
100
101 static void
102 blend_quad(struct quad_stage *qs, struct quad_header *quad)
103 {
104 static const GLfloat zero[4] = { 0, 0, 0, 0 };
105 static const GLfloat one[4] = { 1, 1, 1, 1 };
106 struct softpipe_context *softpipe = qs->softpipe;
107 GLfloat source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
108 GLuint i;
109
110 /* XXX we're also looping in output_quad() !?! */
111
112 for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) {
113 struct softpipe_surface *sps
114 = softpipe_surface(softpipe->framebuffer.cbufs[i]);
115
116 sps->read_quad_f_swz(sps, quad->x0, quad->y0, dest);
117
118 /*
119 * Compute src/first term RGB
120 */
121 switch (softpipe->blend.rgb_src_factor) {
122 case PIPE_BLENDFACTOR_ONE:
123 VEC4_COPY(source[0], quad->outputs.color[0]); /* R */
124 VEC4_COPY(source[1], quad->outputs.color[1]); /* G */
125 VEC4_COPY(source[2], quad->outputs.color[2]); /* B */
126 break;
127 case PIPE_BLENDFACTOR_SRC_COLOR:
128 VEC4_MUL(source[0], quad->outputs.color[0], quad->outputs.color[0]); /* R */
129 VEC4_MUL(source[1], quad->outputs.color[1], quad->outputs.color[1]); /* G */
130 VEC4_MUL(source[2], quad->outputs.color[2], quad->outputs.color[2]); /* B */
131 break;
132 case PIPE_BLENDFACTOR_SRC_ALPHA:
133 {
134 const GLfloat *alpha = quad->outputs.color[3];
135 VEC4_MUL(source[0], quad->outputs.color[0], alpha); /* R */
136 VEC4_MUL(source[1], quad->outputs.color[1], alpha); /* G */
137 VEC4_MUL(source[2], quad->outputs.color[2], alpha); /* B */
138 }
139 break;
140 case PIPE_BLENDFACTOR_DST_COLOR:
141 VEC4_MUL(source[0], quad->outputs.color[0], dest[0]); /* R */
142 VEC4_MUL(source[1], quad->outputs.color[1], dest[1]); /* G */
143 VEC4_MUL(source[2], quad->outputs.color[2], dest[2]); /* B */
144 break;
145 case PIPE_BLENDFACTOR_DST_ALPHA:
146 {
147 const GLfloat *alpha = dest[3];
148 VEC4_MUL(source[0], quad->outputs.color[0], alpha); /* R */
149 VEC4_MUL(source[1], quad->outputs.color[1], alpha); /* G */
150 VEC4_MUL(source[2], quad->outputs.color[2], alpha); /* B */
151 }
152 break;
153 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
154 assert(0); /* to do */
155 break;
156 case PIPE_BLENDFACTOR_CONST_COLOR:
157 {
158 GLfloat comp[4];
159 VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
160 VEC4_MUL(source[0], quad->outputs.color[0], comp); /* R */
161 VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
162 VEC4_MUL(source[1], quad->outputs.color[1], comp); /* G */
163 VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
164 VEC4_MUL(source[2], quad->outputs.color[2], comp); /* B */
165 }
166 break;
167 case PIPE_BLENDFACTOR_CONST_ALPHA:
168 {
169 GLfloat alpha[4];
170 VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
171 VEC4_MUL(source[0], quad->outputs.color[0], alpha); /* R */
172 VEC4_MUL(source[1], quad->outputs.color[1], alpha); /* G */
173 VEC4_MUL(source[2], quad->outputs.color[2], alpha); /* B */
174 }
175 break;
176 case PIPE_BLENDFACTOR_SRC1_COLOR:
177 assert(0); /* to do */
178 break;
179 case PIPE_BLENDFACTOR_SRC1_ALPHA:
180 assert(0); /* to do */
181 break;
182 case PIPE_BLENDFACTOR_ZERO:
183 VEC4_COPY(source[0], zero); /* R */
184 VEC4_COPY(source[1], zero); /* G */
185 VEC4_COPY(source[2], zero); /* B */
186 break;
187 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
188 {
189 GLfloat inv_comp[4];
190 VEC4_SUB(inv_comp, one, quad->outputs.color[0]); /* R */
191 VEC4_MUL(source[0], quad->outputs.color[0], inv_comp); /* R */
192 VEC4_SUB(inv_comp, one, quad->outputs.color[1]); /* G */
193 VEC4_MUL(source[1], quad->outputs.color[1], inv_comp); /* G */
194 VEC4_SUB(inv_comp, one, quad->outputs.color[2]); /* B */
195 VEC4_MUL(source[2], quad->outputs.color[2], inv_comp); /* B */
196 }
197 break;
198 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
199 {
200 GLfloat inv_alpha[4];
201 VEC4_SUB(inv_alpha, one, quad->outputs.color[3]);
202 VEC4_MUL(source[0], quad->outputs.color[0], inv_alpha); /* R */
203 VEC4_MUL(source[1], quad->outputs.color[1], inv_alpha); /* G */
204 VEC4_MUL(source[2], quad->outputs.color[2], inv_alpha); /* B */
205 }
206 break;
207 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
208 {
209 GLfloat inv_alpha[4];
210 VEC4_SUB(inv_alpha, one, dest[3]);
211 VEC4_MUL(source[0], quad->outputs.color[0], inv_alpha); /* R */
212 VEC4_MUL(source[1], quad->outputs.color[1], inv_alpha); /* G */
213 VEC4_MUL(source[2], quad->outputs.color[2], inv_alpha); /* B */
214 }
215 break;
216 case PIPE_BLENDFACTOR_INV_DST_COLOR:
217 {
218 GLfloat inv_comp[4];
219 VEC4_SUB(inv_comp, one, dest[0]); /* R */
220 VEC4_MUL(source[0], quad->outputs.color[0], inv_comp); /* R */
221 VEC4_SUB(inv_comp, one, dest[1]); /* G */
222 VEC4_MUL(source[1], quad->outputs.color[1], inv_comp); /* G */
223 VEC4_SUB(inv_comp, one, dest[2]); /* B */
224 VEC4_MUL(source[2], quad->outputs.color[2], inv_comp); /* B */
225 }
226 break;
227 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
228 {
229 GLfloat inv_comp[4];
230 /* R */
231 VEC4_SCALAR(inv_comp, 1.0 - softpipe->blend_color.color[0]);
232 VEC4_MUL(source[0], quad->outputs.color[0], inv_comp);
233 /* G */
234 VEC4_SCALAR(inv_comp, 1.0 - softpipe->blend_color.color[1]);
235 VEC4_MUL(source[1], quad->outputs.color[1], inv_comp);
236 /* B */
237 VEC4_SCALAR(inv_comp, 1.0 - softpipe->blend_color.color[2]);
238 VEC4_MUL(source[2], quad->outputs.color[2], inv_comp);
239 }
240 break;
241 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
242 {
243 GLfloat alpha[4], inv_alpha[4];
244 VEC4_SCALAR(alpha, 1.0 - softpipe->blend_color.color[3]);
245 VEC4_MUL(source[0], quad->outputs.color[0], inv_alpha); /* R */
246 VEC4_MUL(source[1], quad->outputs.color[1], inv_alpha); /* G */
247 VEC4_MUL(source[2], quad->outputs.color[2], inv_alpha); /* B */
248 }
249 break;
250 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
251 assert(0); /* to do */
252 break;
253 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
254 assert(0); /* to do */
255 break;
256 default:
257 abort();
258 }
259
260 /*
261 * Compute src/first term A
262 */
263 switch (softpipe->blend.alpha_src_factor) {
264 case PIPE_BLENDFACTOR_ONE:
265 VEC4_COPY(source[3], quad->outputs.color[3]); /* A */
266 break;
267 case PIPE_BLENDFACTOR_SRC_ALPHA:
268 {
269 const GLfloat *alpha = quad->outputs.color[3];
270 VEC4_MUL(source[3], quad->outputs.color[3], alpha); /* A */
271 }
272 break;
273 case PIPE_BLENDFACTOR_ZERO:
274 VEC4_COPY(source[3], zero); /* A */
275 break;
276 /* XXX fill in remaining terms */
277 default:
278 abort();
279 }
280
281
282 /*
283 * Compute dest/second term RGB
284 */
285 switch (softpipe->blend.rgb_dst_factor) {
286 case PIPE_BLENDFACTOR_ONE:
287 /* dest = dest * 1 NO-OP, leave dest as-is */
288 break;
289 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
290 {
291 GLfloat one_minus_alpha[QUAD_SIZE];
292 VEC4_SUB(one_minus_alpha, one, quad->outputs.color[3]);
293 VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
294 VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
295 VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
296 }
297 break;
298 case PIPE_BLENDFACTOR_ZERO:
299 VEC4_COPY(dest[0], zero); /* R */
300 VEC4_COPY(dest[1], zero); /* G */
301 VEC4_COPY(dest[2], zero); /* B */
302 break;
303 /* XXX fill in remaining terms */
304 default:
305 abort();
306 }
307
308 /*
309 * Compute dest/second term A
310 */
311 switch (softpipe->blend.alpha_dst_factor) {
312 case PIPE_BLENDFACTOR_ONE:
313 /* dest = dest * 1 NO-OP, leave dest as-is */
314 break;
315 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
316 {
317 GLfloat one_minus_alpha[QUAD_SIZE];
318 VEC4_SUB(one_minus_alpha, one, quad->outputs.color[3]);
319 VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
320 }
321 break;
322 case PIPE_BLENDFACTOR_ZERO:
323 VEC4_COPY(dest[3], zero); /* A */
324 break;
325 /* XXX fill in remaining terms */
326 default:
327 abort();
328 }
329
330 /*
331 * Combine RGB terms
332 */
333 switch (softpipe->blend.rgb_func) {
334 case PIPE_BLEND_ADD:
335 VEC4_ADD(quad->outputs.color[0], source[0], dest[0]); /* R */
336 VEC4_ADD(quad->outputs.color[1], source[1], dest[1]); /* G */
337 VEC4_ADD(quad->outputs.color[2], source[2], dest[2]); /* B */
338 break;
339 case PIPE_BLEND_SUBTRACT:
340 VEC4_SUB(quad->outputs.color[0], source[0], dest[0]); /* R */
341 VEC4_SUB(quad->outputs.color[1], source[1], dest[1]); /* G */
342 VEC4_SUB(quad->outputs.color[2], source[2], dest[2]); /* B */
343 break;
344 case PIPE_BLEND_REVERSE_SUBTRACT:
345 VEC4_SUB(quad->outputs.color[0], dest[0], source[0]); /* R */
346 VEC4_SUB(quad->outputs.color[1], dest[1], source[1]); /* G */
347 VEC4_SUB(quad->outputs.color[2], dest[2], source[2]); /* B */
348 break;
349 case PIPE_BLEND_MIN:
350 VEC4_MIN(quad->outputs.color[0], source[0], dest[0]); /* R */
351 VEC4_MIN(quad->outputs.color[1], source[1], dest[1]); /* G */
352 VEC4_MIN(quad->outputs.color[2], source[2], dest[2]); /* B */
353 break;
354 case PIPE_BLEND_MAX:
355 VEC4_MAX(quad->outputs.color[0], source[0], dest[0]); /* R */
356 VEC4_MAX(quad->outputs.color[1], source[1], dest[1]); /* G */
357 VEC4_MAX(quad->outputs.color[2], source[2], dest[2]); /* B */
358 break;
359 default:
360 abort();
361 }
362
363 /*
364 * Combine A terms
365 */
366 switch (softpipe->blend.alpha_func) {
367 case PIPE_BLEND_ADD:
368 VEC4_ADD(quad->outputs.color[3], source[3], dest[3]); /* A */
369 break;
370 case PIPE_BLEND_SUBTRACT:
371 VEC4_SUB(quad->outputs.color[3], source[3], dest[3]); /* A */
372 break;
373 case PIPE_BLEND_REVERSE_SUBTRACT:
374 VEC4_SUB(quad->outputs.color[3], dest[3], source[3]); /* A */
375 break;
376 case PIPE_BLEND_MIN:
377 VEC4_MIN(quad->outputs.color[3], source[3], dest[3]); /* A */
378 break;
379 case PIPE_BLEND_MAX:
380 VEC4_MAX(quad->outputs.color[3], source[3], dest[3]); /* A */
381 default:
382 abort();
383 }
384
385 /* pass blended quad to next stage */
386 qs->next->run(qs->next, quad);
387 }
388 }
389
390
391
392
393 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
394 {
395 struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
396
397 stage->softpipe = softpipe;
398 stage->run = blend_quad;
399
400 return stage;
401 }