Merge remote branch 'vdpau/pipe-video' into pipe-video
[mesa.git] / src / gallium / drivers / softpipe / sp_quad_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * quad blending
30 * \author Brian Paul
31 */
32
33 #include "pipe/p_defines.h"
34 #include "util/u_math.h"
35 #include "util/u_memory.h"
36 #include "util/u_format.h"
37 #include "sp_context.h"
38 #include "sp_state.h"
39 #include "sp_quad.h"
40 #include "sp_tile_cache.h"
41 #include "sp_quad_pipe.h"
42
43
44 #define VEC4_COPY(DST, SRC) \
45 do { \
46 DST[0] = SRC[0]; \
47 DST[1] = SRC[1]; \
48 DST[2] = SRC[2]; \
49 DST[3] = SRC[3]; \
50 } while(0)
51
52 #define VEC4_SCALAR(DST, SRC) \
53 do { \
54 DST[0] = SRC; \
55 DST[1] = SRC; \
56 DST[2] = SRC; \
57 DST[3] = SRC; \
58 } while(0)
59
60 #define VEC4_ADD(R, A, B) \
61 do { \
62 R[0] = A[0] + B[0]; \
63 R[1] = A[1] + B[1]; \
64 R[2] = A[2] + B[2]; \
65 R[3] = A[3] + B[3]; \
66 } while (0)
67
68 #define VEC4_SUB(R, A, B) \
69 do { \
70 R[0] = A[0] - B[0]; \
71 R[1] = A[1] - B[1]; \
72 R[2] = A[2] - B[2]; \
73 R[3] = A[3] - B[3]; \
74 } while (0)
75
76 /** Add and limit result to ceiling of 1.0 */
77 #define VEC4_ADD_SAT(R, A, B) \
78 do { \
79 R[0] = A[0] + B[0]; if (R[0] > 1.0f) R[0] = 1.0f; \
80 R[1] = A[1] + B[1]; if (R[1] > 1.0f) R[1] = 1.0f; \
81 R[2] = A[2] + B[2]; if (R[2] > 1.0f) R[2] = 1.0f; \
82 R[3] = A[3] + B[3]; if (R[3] > 1.0f) R[3] = 1.0f; \
83 } while (0)
84
85 /** Subtract and limit result to floor of 0.0 */
86 #define VEC4_SUB_SAT(R, A, B) \
87 do { \
88 R[0] = A[0] - B[0]; if (R[0] < 0.0f) R[0] = 0.0f; \
89 R[1] = A[1] - B[1]; if (R[1] < 0.0f) R[1] = 0.0f; \
90 R[2] = A[2] - B[2]; if (R[2] < 0.0f) R[2] = 0.0f; \
91 R[3] = A[3] - B[3]; if (R[3] < 0.0f) R[3] = 0.0f; \
92 } while (0)
93
94 #define VEC4_MUL(R, A, B) \
95 do { \
96 R[0] = A[0] * B[0]; \
97 R[1] = A[1] * B[1]; \
98 R[2] = A[2] * B[2]; \
99 R[3] = A[3] * B[3]; \
100 } while (0)
101
102 #define VEC4_MIN(R, A, B) \
103 do { \
104 R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
105 R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
106 R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
107 R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
108 } while (0)
109
110 #define VEC4_MAX(R, A, B) \
111 do { \
112 R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
113 R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
114 R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
115 R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
116 } while (0)
117
118
119
120 static void
121 logicop_quad(struct quad_stage *qs,
122 float (*quadColor)[4],
123 float (*dest)[4])
124 {
125 struct softpipe_context *softpipe = qs->softpipe;
126 ubyte src[4][4], dst[4][4], res[4][4];
127 uint *src4 = (uint *) src;
128 uint *dst4 = (uint *) dst;
129 uint *res4 = (uint *) res;
130 uint j;
131
132
133 /* convert to ubyte */
134 for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
135 dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
136 dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
137 dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
138 dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
139
140 src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
141 src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
142 src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
143 src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
144 }
145
146 switch (softpipe->blend->logicop_func) {
147 case PIPE_LOGICOP_CLEAR:
148 for (j = 0; j < 4; j++)
149 res4[j] = 0;
150 break;
151 case PIPE_LOGICOP_NOR:
152 for (j = 0; j < 4; j++)
153 res4[j] = ~(src4[j] | dst4[j]);
154 break;
155 case PIPE_LOGICOP_AND_INVERTED:
156 for (j = 0; j < 4; j++)
157 res4[j] = ~src4[j] & dst4[j];
158 break;
159 case PIPE_LOGICOP_COPY_INVERTED:
160 for (j = 0; j < 4; j++)
161 res4[j] = ~src4[j];
162 break;
163 case PIPE_LOGICOP_AND_REVERSE:
164 for (j = 0; j < 4; j++)
165 res4[j] = src4[j] & ~dst4[j];
166 break;
167 case PIPE_LOGICOP_INVERT:
168 for (j = 0; j < 4; j++)
169 res4[j] = ~dst4[j];
170 break;
171 case PIPE_LOGICOP_XOR:
172 for (j = 0; j < 4; j++)
173 res4[j] = dst4[j] ^ src4[j];
174 break;
175 case PIPE_LOGICOP_NAND:
176 for (j = 0; j < 4; j++)
177 res4[j] = ~(src4[j] & dst4[j]);
178 break;
179 case PIPE_LOGICOP_AND:
180 for (j = 0; j < 4; j++)
181 res4[j] = src4[j] & dst4[j];
182 break;
183 case PIPE_LOGICOP_EQUIV:
184 for (j = 0; j < 4; j++)
185 res4[j] = ~(src4[j] ^ dst4[j]);
186 break;
187 case PIPE_LOGICOP_NOOP:
188 for (j = 0; j < 4; j++)
189 res4[j] = dst4[j];
190 break;
191 case PIPE_LOGICOP_OR_INVERTED:
192 for (j = 0; j < 4; j++)
193 res4[j] = ~src4[j] | dst4[j];
194 break;
195 case PIPE_LOGICOP_COPY:
196 for (j = 0; j < 4; j++)
197 res4[j] = src4[j];
198 break;
199 case PIPE_LOGICOP_OR_REVERSE:
200 for (j = 0; j < 4; j++)
201 res4[j] = src4[j] | ~dst4[j];
202 break;
203 case PIPE_LOGICOP_OR:
204 for (j = 0; j < 4; j++)
205 res4[j] = src4[j] | dst4[j];
206 break;
207 case PIPE_LOGICOP_SET:
208 for (j = 0; j < 4; j++)
209 res4[j] = ~0;
210 break;
211 default:
212 assert(0 && "invalid logicop mode");
213 }
214
215 for (j = 0; j < 4; j++) {
216 quadColor[j][0] = ubyte_to_float(res[j][0]);
217 quadColor[j][1] = ubyte_to_float(res[j][1]);
218 quadColor[j][2] = ubyte_to_float(res[j][2]);
219 quadColor[j][3] = ubyte_to_float(res[j][3]);
220 }
221 }
222
223
224
225 /**
226 * Do blending for a 2x2 quad for one color buffer.
227 * \param quadColor the incoming quad colors
228 * \param dest the destination/framebuffer quad colors
229 * \param blend_index which set of blending terms to use
230 * \param has_dst_alpha does the dest color buffer have an alpha channel?
231 */
232 static void
233 blend_quad(struct quad_stage *qs,
234 float (*quadColor)[4],
235 float (*dest)[4],
236 unsigned blend_index,
237 boolean has_dst_alpha)
238 {
239 static const float zero[4] = { 0, 0, 0, 0 };
240 static const float one[4] = { 1, 1, 1, 1 };
241 struct softpipe_context *softpipe = qs->softpipe;
242 float source[4][QUAD_SIZE] = { { 0 } };
243
244 /*
245 * Compute src/first term RGB
246 */
247 switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
248 case PIPE_BLENDFACTOR_ONE:
249 VEC4_COPY(source[0], quadColor[0]); /* R */
250 VEC4_COPY(source[1], quadColor[1]); /* G */
251 VEC4_COPY(source[2], quadColor[2]); /* B */
252 break;
253 case PIPE_BLENDFACTOR_SRC_COLOR:
254 VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
255 VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
256 VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
257 break;
258 case PIPE_BLENDFACTOR_SRC_ALPHA:
259 {
260 const float *alpha = quadColor[3];
261 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
262 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
263 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
264 }
265 break;
266 case PIPE_BLENDFACTOR_DST_COLOR:
267 VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
268 VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
269 VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
270 break;
271 case PIPE_BLENDFACTOR_DST_ALPHA:
272 if (has_dst_alpha) {
273 const float *alpha = dest[3];
274 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
275 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
276 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
277 }
278 else {
279 VEC4_COPY(source[0], quadColor[0]); /* R */
280 VEC4_COPY(source[1], quadColor[1]); /* G */
281 VEC4_COPY(source[2], quadColor[2]); /* B */
282 }
283 break;
284 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
285 if (has_dst_alpha) {
286 const float *alpha = quadColor[3];
287 float diff[4], temp[4];
288 VEC4_SUB(diff, one, dest[3]);
289 VEC4_MIN(temp, alpha, diff);
290 VEC4_MUL(source[0], quadColor[0], temp); /* R */
291 VEC4_MUL(source[1], quadColor[1], temp); /* G */
292 VEC4_MUL(source[2], quadColor[2], temp); /* B */
293 }
294 else {
295 VEC4_COPY(source[0], zero); /* R */
296 VEC4_COPY(source[1], zero); /* G */
297 VEC4_COPY(source[2], zero); /* B */
298 }
299 break;
300 case PIPE_BLENDFACTOR_CONST_COLOR:
301 {
302 float comp[4];
303 VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
304 VEC4_MUL(source[0], quadColor[0], comp); /* R */
305 VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
306 VEC4_MUL(source[1], quadColor[1], comp); /* G */
307 VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
308 VEC4_MUL(source[2], quadColor[2], comp); /* B */
309 }
310 break;
311 case PIPE_BLENDFACTOR_CONST_ALPHA:
312 {
313 float alpha[4];
314 VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
315 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
316 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
317 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
318 }
319 break;
320 case PIPE_BLENDFACTOR_SRC1_COLOR:
321 assert(0); /* to do */
322 break;
323 case PIPE_BLENDFACTOR_SRC1_ALPHA:
324 assert(0); /* to do */
325 break;
326 case PIPE_BLENDFACTOR_ZERO:
327 VEC4_COPY(source[0], zero); /* R */
328 VEC4_COPY(source[1], zero); /* G */
329 VEC4_COPY(source[2], zero); /* B */
330 break;
331 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
332 {
333 float inv_comp[4];
334 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
335 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
336 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
337 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
338 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
339 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
340 }
341 break;
342 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
343 {
344 float inv_alpha[4];
345 VEC4_SUB(inv_alpha, one, quadColor[3]);
346 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
347 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
348 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
349 }
350 break;
351 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
352 if (has_dst_alpha) {
353 float inv_alpha[4];
354 VEC4_SUB(inv_alpha, one, dest[3]);
355 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
356 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
357 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
358 }
359 else {
360 VEC4_COPY(source[0], zero); /* R */
361 VEC4_COPY(source[1], zero); /* G */
362 VEC4_COPY(source[2], zero); /* B */
363 }
364 break;
365 case PIPE_BLENDFACTOR_INV_DST_COLOR:
366 {
367 float inv_comp[4];
368 VEC4_SUB(inv_comp, one, dest[0]); /* R */
369 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
370 VEC4_SUB(inv_comp, one, dest[1]); /* G */
371 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
372 VEC4_SUB(inv_comp, one, dest[2]); /* B */
373 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
374 }
375 break;
376 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
377 {
378 float inv_comp[4];
379 /* R */
380 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
381 VEC4_MUL(source[0], quadColor[0], inv_comp);
382 /* G */
383 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
384 VEC4_MUL(source[1], quadColor[1], inv_comp);
385 /* B */
386 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
387 VEC4_MUL(source[2], quadColor[2], inv_comp);
388 }
389 break;
390 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
391 {
392 float inv_alpha[4];
393 VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]);
394 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
395 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
396 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
397 }
398 break;
399 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
400 assert(0); /* to do */
401 break;
402 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
403 assert(0); /* to do */
404 break;
405 default:
406 assert(0 && "invalid rgb src factor");
407 }
408
409 /*
410 * Compute src/first term A
411 */
412 switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
413 case PIPE_BLENDFACTOR_ONE:
414 VEC4_COPY(source[3], quadColor[3]); /* A */
415 break;
416 case PIPE_BLENDFACTOR_SRC_COLOR:
417 /* fall-through */
418 case PIPE_BLENDFACTOR_SRC_ALPHA:
419 {
420 const float *alpha = quadColor[3];
421 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
422 }
423 break;
424 case PIPE_BLENDFACTOR_DST_COLOR:
425 /* fall-through */
426 case PIPE_BLENDFACTOR_DST_ALPHA:
427 if (has_dst_alpha)
428 VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
429 else
430 VEC4_COPY(source[3], quadColor[3]); /* A */
431 break;
432 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
433 /* multiply alpha by 1.0 */
434 VEC4_COPY(source[3], quadColor[3]); /* A */
435 break;
436 case PIPE_BLENDFACTOR_CONST_COLOR:
437 /* fall-through */
438 case PIPE_BLENDFACTOR_CONST_ALPHA:
439 {
440 float comp[4];
441 VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
442 VEC4_MUL(source[3], quadColor[3], comp); /* A */
443 }
444 break;
445 case PIPE_BLENDFACTOR_ZERO:
446 VEC4_COPY(source[3], zero); /* A */
447 break;
448 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
449 /* fall-through */
450 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
451 {
452 float inv_alpha[4];
453 VEC4_SUB(inv_alpha, one, quadColor[3]);
454 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
455 }
456 break;
457 case PIPE_BLENDFACTOR_INV_DST_COLOR:
458 /* fall-through */
459 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
460 if (has_dst_alpha) {
461 float inv_alpha[4];
462 VEC4_SUB(inv_alpha, one, dest[3]);
463 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
464 }
465 else {
466 VEC4_COPY(source[3], zero); /* A */
467 }
468 break;
469 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
470 /* fall-through */
471 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
472 {
473 float inv_comp[4];
474 /* A */
475 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
476 VEC4_MUL(source[3], quadColor[3], inv_comp);
477 }
478 break;
479 default:
480 assert(0 && "invalid alpha src factor");
481 }
482
483
484 /*
485 * Compute dest/second term RGB
486 */
487 switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
488 case PIPE_BLENDFACTOR_ONE:
489 /* dest = dest * 1 NO-OP, leave dest as-is */
490 break;
491 case PIPE_BLENDFACTOR_SRC_COLOR:
492 VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
493 VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
494 VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
495 break;
496 case PIPE_BLENDFACTOR_SRC_ALPHA:
497 VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
498 VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
499 VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
500 break;
501 case PIPE_BLENDFACTOR_DST_ALPHA:
502 if (has_dst_alpha) {
503 VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
504 VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
505 VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
506 }
507 else {
508 /* dest = dest * 1 NO-OP, leave dest as-is */
509 }
510 break;
511 case PIPE_BLENDFACTOR_DST_COLOR:
512 VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
513 VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
514 VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
515 break;
516 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
517 if (has_dst_alpha) {
518 const float *alpha = quadColor[3];
519 float diff[4], temp[4];
520 VEC4_SUB(diff, one, dest[3]);
521 VEC4_MIN(temp, alpha, diff);
522 VEC4_MUL(dest[0], quadColor[0], temp); /* R */
523 VEC4_MUL(dest[1], quadColor[1], temp); /* G */
524 VEC4_MUL(dest[2], quadColor[2], temp); /* B */
525 }
526 else {
527 VEC4_COPY(dest[0], zero); /* R */
528 VEC4_COPY(dest[1], zero); /* G */
529 VEC4_COPY(dest[2], zero); /* B */
530 }
531 break;
532 case PIPE_BLENDFACTOR_CONST_COLOR:
533 {
534 float comp[4];
535 VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
536 VEC4_MUL(dest[0], dest[0], comp); /* R */
537 VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
538 VEC4_MUL(dest[1], dest[1], comp); /* G */
539 VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
540 VEC4_MUL(dest[2], dest[2], comp); /* B */
541 }
542 break;
543 case PIPE_BLENDFACTOR_CONST_ALPHA:
544 {
545 float comp[4];
546 VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
547 VEC4_MUL(dest[0], dest[0], comp); /* R */
548 VEC4_MUL(dest[1], dest[1], comp); /* G */
549 VEC4_MUL(dest[2], dest[2], comp); /* B */
550 }
551 break;
552 case PIPE_BLENDFACTOR_ZERO:
553 VEC4_COPY(dest[0], zero); /* R */
554 VEC4_COPY(dest[1], zero); /* G */
555 VEC4_COPY(dest[2], zero); /* B */
556 break;
557 case PIPE_BLENDFACTOR_SRC1_COLOR:
558 case PIPE_BLENDFACTOR_SRC1_ALPHA:
559 /* XXX what are these? */
560 assert(0);
561 break;
562 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
563 {
564 float inv_comp[4];
565 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
566 VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
567 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
568 VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
569 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
570 VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
571 }
572 break;
573 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
574 {
575 float one_minus_alpha[QUAD_SIZE];
576 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
577 VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
578 VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
579 VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
580 }
581 break;
582 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
583 if (has_dst_alpha) {
584 float inv_comp[4];
585 VEC4_SUB(inv_comp, one, dest[3]); /* A */
586 VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
587 VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
588 VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
589 }
590 else {
591 VEC4_COPY(dest[0], zero); /* R */
592 VEC4_COPY(dest[1], zero); /* G */
593 VEC4_COPY(dest[2], zero); /* B */
594 }
595 break;
596 case PIPE_BLENDFACTOR_INV_DST_COLOR:
597 {
598 float inv_comp[4];
599 VEC4_SUB(inv_comp, one, dest[0]); /* R */
600 VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
601 VEC4_SUB(inv_comp, one, dest[1]); /* G */
602 VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
603 VEC4_SUB(inv_comp, one, dest[2]); /* B */
604 VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
605 }
606 break;
607 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
608 {
609 float inv_comp[4];
610 /* R */
611 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
612 VEC4_MUL(dest[0], dest[0], inv_comp);
613 /* G */
614 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
615 VEC4_MUL(dest[1], dest[1], inv_comp);
616 /* B */
617 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
618 VEC4_MUL(dest[2], dest[2], inv_comp);
619 }
620 break;
621 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
622 {
623 float inv_comp[4];
624 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
625 VEC4_MUL(dest[0], dest[0], inv_comp);
626 VEC4_MUL(dest[1], dest[1], inv_comp);
627 VEC4_MUL(dest[2], dest[2], inv_comp);
628 }
629 break;
630 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
631 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
632 /* XXX what are these? */
633 assert(0);
634 break;
635 default:
636 assert(0 && "invalid rgb dst factor");
637 }
638
639 /*
640 * Compute dest/second term A
641 */
642 switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
643 case PIPE_BLENDFACTOR_ONE:
644 /* dest = dest * 1 NO-OP, leave dest as-is */
645 break;
646 case PIPE_BLENDFACTOR_SRC_COLOR:
647 /* fall-through */
648 case PIPE_BLENDFACTOR_SRC_ALPHA:
649 VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
650 break;
651 case PIPE_BLENDFACTOR_DST_COLOR:
652 /* fall-through */
653 case PIPE_BLENDFACTOR_DST_ALPHA:
654 if (has_dst_alpha) {
655 VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
656 }
657 else {
658 /* dest = dest * 1 NO-OP, leave dest as-is */
659 }
660 break;
661 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
662 /* dest = dest * 1 NO-OP, leave dest as-is */
663 break;
664 case PIPE_BLENDFACTOR_CONST_COLOR:
665 /* fall-through */
666 case PIPE_BLENDFACTOR_CONST_ALPHA:
667 {
668 float comp[4];
669 VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
670 VEC4_MUL(dest[3], dest[3], comp); /* A */
671 }
672 break;
673 case PIPE_BLENDFACTOR_ZERO:
674 VEC4_COPY(dest[3], zero); /* A */
675 break;
676 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
677 /* fall-through */
678 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
679 {
680 float one_minus_alpha[QUAD_SIZE];
681 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
682 VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
683 }
684 break;
685 case PIPE_BLENDFACTOR_INV_DST_COLOR:
686 /* fall-through */
687 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
688 if (has_dst_alpha) {
689 float inv_comp[4];
690 VEC4_SUB(inv_comp, one, dest[3]); /* A */
691 VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
692 }
693 else {
694 VEC4_COPY(dest[3], zero); /* A */
695 }
696 break;
697 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
698 /* fall-through */
699 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
700 {
701 float inv_comp[4];
702 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
703 VEC4_MUL(dest[3], dest[3], inv_comp);
704 }
705 break;
706 default:
707 assert(0 && "invalid alpha dst factor");
708 }
709
710 /*
711 * Combine RGB terms
712 */
713 switch (softpipe->blend->rt[blend_index].rgb_func) {
714 case PIPE_BLEND_ADD:
715 VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
716 VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
717 VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
718 break;
719 case PIPE_BLEND_SUBTRACT:
720 VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
721 VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
722 VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
723 break;
724 case PIPE_BLEND_REVERSE_SUBTRACT:
725 VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
726 VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
727 VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
728 break;
729 case PIPE_BLEND_MIN:
730 VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
731 VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
732 VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
733 break;
734 case PIPE_BLEND_MAX:
735 VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
736 VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
737 VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
738 break;
739 default:
740 assert(0 && "invalid rgb blend func");
741 }
742
743 /*
744 * Combine A terms
745 */
746 switch (softpipe->blend->rt[blend_index].alpha_func) {
747 case PIPE_BLEND_ADD:
748 VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
749 break;
750 case PIPE_BLEND_SUBTRACT:
751 VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
752 break;
753 case PIPE_BLEND_REVERSE_SUBTRACT:
754 VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
755 break;
756 case PIPE_BLEND_MIN:
757 VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
758 break;
759 case PIPE_BLEND_MAX:
760 VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
761 break;
762 default:
763 assert(0 && "invalid alpha blend func");
764 }
765 }
766
767 static void
768 colormask_quad(unsigned colormask,
769 float (*quadColor)[4],
770 float (*dest)[4])
771 {
772 /* R */
773 if (!(colormask & PIPE_MASK_R))
774 COPY_4V(quadColor[0], dest[0]);
775
776 /* G */
777 if (!(colormask & PIPE_MASK_G))
778 COPY_4V(quadColor[1], dest[1]);
779
780 /* B */
781 if (!(colormask & PIPE_MASK_B))
782 COPY_4V(quadColor[2], dest[2]);
783
784 /* A */
785 if (!(colormask & PIPE_MASK_A))
786 COPY_4V(quadColor[3], dest[3]);
787 }
788
789
790 static void
791 blend_fallback(struct quad_stage *qs,
792 struct quad_header *quads[],
793 unsigned nr)
794 {
795 struct softpipe_context *softpipe = qs->softpipe;
796 const struct pipe_blend_state *blend = softpipe->blend;
797 unsigned cbuf;
798 boolean write_all;
799
800 write_all = softpipe->fs->color0_writes_all_cbufs;
801
802 for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++)
803 {
804 /* which blend/mask state index to use: */
805 const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
806 float dest[4][QUAD_SIZE];
807 struct softpipe_cached_tile *tile
808 = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
809 quads[0]->input.x0,
810 quads[0]->input.y0);
811 boolean has_dst_alpha
812 = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format);
813 uint q, i, j, qbuf;
814
815 qbuf = write_all ? 0 : cbuf;
816
817 for (q = 0; q < nr; q++) {
818 struct quad_header *quad = quads[q];
819 float (*quadColor)[4];
820 const int itx = (quad->input.x0 & (TILE_SIZE-1));
821 const int ity = (quad->input.y0 & (TILE_SIZE-1));
822
823 quadColor = quad->output.color[qbuf];
824
825 /* get/swizzle dest colors
826 */
827 for (j = 0; j < QUAD_SIZE; j++) {
828 int x = itx + (j & 1);
829 int y = ity + (j >> 1);
830 for (i = 0; i < 4; i++) {
831 dest[i][j] = tile->data.color[y][x][i];
832 }
833 }
834
835
836 if (blend->logicop_enable) {
837 logicop_quad( qs, quadColor, dest );
838 }
839 else if (blend->rt[blend_buf].blend_enable) {
840 blend_quad( qs, quadColor, dest, blend_buf, has_dst_alpha );
841 }
842
843 if (blend->rt[blend_buf].colormask != 0xf)
844 colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
845
846 /* Output color values
847 */
848 for (j = 0; j < QUAD_SIZE; j++) {
849 if (quad->inout.mask & (1 << j)) {
850 int x = itx + (j & 1);
851 int y = ity + (j >> 1);
852 for (i = 0; i < 4; i++) { /* loop over color chans */
853 tile->data.color[y][x][i] = quadColor[i][j];
854 }
855 }
856 }
857 }
858 }
859 }
860
861
862 static void
863 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
864 struct quad_header *quads[],
865 unsigned nr)
866 {
867 static const float one[4] = { 1, 1, 1, 1 };
868 float one_minus_alpha[QUAD_SIZE];
869 float dest[4][QUAD_SIZE];
870 float source[4][QUAD_SIZE];
871 uint i, j, q;
872
873 struct softpipe_cached_tile *tile
874 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
875 quads[0]->input.x0,
876 quads[0]->input.y0);
877
878 for (q = 0; q < nr; q++) {
879 struct quad_header *quad = quads[q];
880 float (*quadColor)[4] = quad->output.color[0];
881 const float *alpha = quadColor[3];
882 const int itx = (quad->input.x0 & (TILE_SIZE-1));
883 const int ity = (quad->input.y0 & (TILE_SIZE-1));
884
885 /* get/swizzle dest colors */
886 for (j = 0; j < QUAD_SIZE; j++) {
887 int x = itx + (j & 1);
888 int y = ity + (j >> 1);
889 for (i = 0; i < 4; i++) {
890 dest[i][j] = tile->data.color[y][x][i];
891 }
892 }
893
894 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
895 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
896 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
897 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
898
899 VEC4_SUB(one_minus_alpha, one, alpha);
900 VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
901 VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
902 VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
903 VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */
904
905 VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
906 VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
907 VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
908 VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
909
910 for (j = 0; j < QUAD_SIZE; j++) {
911 if (quad->inout.mask & (1 << j)) {
912 int x = itx + (j & 1);
913 int y = ity + (j >> 1);
914 for (i = 0; i < 4; i++) { /* loop over color chans */
915 tile->data.color[y][x][i] = quadColor[i][j];
916 }
917 }
918 }
919 }
920 }
921
922 static void
923 blend_single_add_one_one(struct quad_stage *qs,
924 struct quad_header *quads[],
925 unsigned nr)
926 {
927 float dest[4][QUAD_SIZE];
928 uint i, j, q;
929
930 struct softpipe_cached_tile *tile
931 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
932 quads[0]->input.x0,
933 quads[0]->input.y0);
934
935 for (q = 0; q < nr; q++) {
936 struct quad_header *quad = quads[q];
937 float (*quadColor)[4] = quad->output.color[0];
938 const int itx = (quad->input.x0 & (TILE_SIZE-1));
939 const int ity = (quad->input.y0 & (TILE_SIZE-1));
940
941 /* get/swizzle dest colors */
942 for (j = 0; j < QUAD_SIZE; j++) {
943 int x = itx + (j & 1);
944 int y = ity + (j >> 1);
945 for (i = 0; i < 4; i++) {
946 dest[i][j] = tile->data.color[y][x][i];
947 }
948 }
949
950 VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */
951 VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */
952 VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */
953 VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */
954
955 for (j = 0; j < QUAD_SIZE; j++) {
956 if (quad->inout.mask & (1 << j)) {
957 int x = itx + (j & 1);
958 int y = ity + (j >> 1);
959 for (i = 0; i < 4; i++) { /* loop over color chans */
960 tile->data.color[y][x][i] = quadColor[i][j];
961 }
962 }
963 }
964 }
965 }
966
967
968 static void
969 single_output_color(struct quad_stage *qs,
970 struct quad_header *quads[],
971 unsigned nr)
972 {
973 uint i, j, q;
974
975 struct softpipe_cached_tile *tile
976 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
977 quads[0]->input.x0,
978 quads[0]->input.y0);
979
980 for (q = 0; q < nr; q++) {
981 struct quad_header *quad = quads[q];
982 float (*quadColor)[4] = quad->output.color[0];
983 const int itx = (quad->input.x0 & (TILE_SIZE-1));
984 const int ity = (quad->input.y0 & (TILE_SIZE-1));
985
986 for (j = 0; j < QUAD_SIZE; j++) {
987 if (quad->inout.mask & (1 << j)) {
988 int x = itx + (j & 1);
989 int y = ity + (j >> 1);
990 for (i = 0; i < 4; i++) { /* loop over color chans */
991 tile->data.color[y][x][i] = quadColor[i][j];
992 }
993 }
994 }
995 }
996 }
997
998 static void
999 blend_noop(struct quad_stage *qs,
1000 struct quad_header *quads[],
1001 unsigned nr)
1002 {
1003 }
1004
1005
1006 static void
1007 choose_blend_quad(struct quad_stage *qs,
1008 struct quad_header *quads[],
1009 unsigned nr)
1010 {
1011 struct softpipe_context *softpipe = qs->softpipe;
1012 const struct pipe_blend_state *blend = softpipe->blend;
1013
1014 qs->run = blend_fallback;
1015
1016 if (softpipe->framebuffer.nr_cbufs == 0) {
1017 qs->run = blend_noop;
1018 }
1019 else if (!softpipe->blend->logicop_enable &&
1020 softpipe->blend->rt[0].colormask == 0xf &&
1021 softpipe->framebuffer.nr_cbufs == 1)
1022 {
1023 if (!blend->rt[0].blend_enable) {
1024 qs->run = single_output_color;
1025 }
1026 else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1027 blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1028 blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1029 {
1030 if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1031 if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1032 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1033 qs->run = blend_single_add_one_one;
1034 }
1035 else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1036 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1037 qs->run = blend_single_add_src_alpha_inv_src_alpha;
1038
1039 }
1040 }
1041 }
1042
1043 qs->run(qs, quads, nr);
1044 }
1045
1046
1047 static void blend_begin(struct quad_stage *qs)
1048 {
1049 qs->run = choose_blend_quad;
1050 }
1051
1052
1053 static void blend_destroy(struct quad_stage *qs)
1054 {
1055 FREE( qs );
1056 }
1057
1058
1059 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1060 {
1061 struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
1062
1063 stage->softpipe = softpipe;
1064 stage->begin = blend_begin;
1065 stage->run = choose_blend_quad;
1066 stage->destroy = blend_destroy;
1067
1068 return stage;
1069 }