82f9785e32a45dcde19ac847943442f9ebfa4a5f
[mesa.git] / src / gallium / drivers / softpipe / sp_quad_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * quad blending
30 * \author Brian Paul
31 */
32
33 #include "pipe/p_defines.h"
34 #include "util/u_math.h"
35 #include "util/u_memory.h"
36 #include "util/u_format.h"
37 #include "sp_context.h"
38 #include "sp_state.h"
39 #include "sp_quad.h"
40 #include "sp_tile_cache.h"
41 #include "sp_quad_pipe.h"
42
43
44 #define VEC4_COPY(DST, SRC) \
45 do { \
46 DST[0] = SRC[0]; \
47 DST[1] = SRC[1]; \
48 DST[2] = SRC[2]; \
49 DST[3] = SRC[3]; \
50 } while(0)
51
52 #define VEC4_SCALAR(DST, SRC) \
53 do { \
54 DST[0] = SRC; \
55 DST[1] = SRC; \
56 DST[2] = SRC; \
57 DST[3] = SRC; \
58 } while(0)
59
60 #define VEC4_ADD(R, A, B) \
61 do { \
62 R[0] = A[0] + B[0]; \
63 R[1] = A[1] + B[1]; \
64 R[2] = A[2] + B[2]; \
65 R[3] = A[3] + B[3]; \
66 } while (0)
67
68 #define VEC4_SUB(R, A, B) \
69 do { \
70 R[0] = A[0] - B[0]; \
71 R[1] = A[1] - B[1]; \
72 R[2] = A[2] - B[2]; \
73 R[3] = A[3] - B[3]; \
74 } while (0)
75
76 /** Add and limit result to ceiling of 1.0 */
77 #define VEC4_ADD_SAT(R, A, B) \
78 do { \
79 R[0] = A[0] + B[0]; if (R[0] > 1.0f) R[0] = 1.0f; \
80 R[1] = A[1] + B[1]; if (R[1] > 1.0f) R[1] = 1.0f; \
81 R[2] = A[2] + B[2]; if (R[2] > 1.0f) R[2] = 1.0f; \
82 R[3] = A[3] + B[3]; if (R[3] > 1.0f) R[3] = 1.0f; \
83 } while (0)
84
85 /** Subtract and limit result to floor of 0.0 */
86 #define VEC4_SUB_SAT(R, A, B) \
87 do { \
88 R[0] = A[0] - B[0]; if (R[0] < 0.0f) R[0] = 0.0f; \
89 R[1] = A[1] - B[1]; if (R[1] < 0.0f) R[1] = 0.0f; \
90 R[2] = A[2] - B[2]; if (R[2] < 0.0f) R[2] = 0.0f; \
91 R[3] = A[3] - B[3]; if (R[3] < 0.0f) R[3] = 0.0f; \
92 } while (0)
93
94 #define VEC4_MUL(R, A, B) \
95 do { \
96 R[0] = A[0] * B[0]; \
97 R[1] = A[1] * B[1]; \
98 R[2] = A[2] * B[2]; \
99 R[3] = A[3] * B[3]; \
100 } while (0)
101
102 #define VEC4_MIN(R, A, B) \
103 do { \
104 R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
105 R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
106 R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
107 R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
108 } while (0)
109
110 #define VEC4_MAX(R, A, B) \
111 do { \
112 R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
113 R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
114 R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
115 R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
116 } while (0)
117
118
119
120 static void
121 logicop_quad(struct quad_stage *qs,
122 float (*quadColor)[4],
123 float (*dest)[4])
124 {
125 struct softpipe_context *softpipe = qs->softpipe;
126 ubyte src[4][4], dst[4][4], res[4][4];
127 uint *src4 = (uint *) src;
128 uint *dst4 = (uint *) dst;
129 uint *res4 = (uint *) res;
130 uint j;
131
132
133 /* convert to ubyte */
134 for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
135 dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
136 dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
137 dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
138 dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
139
140 src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
141 src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
142 src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
143 src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
144 }
145
146 switch (softpipe->blend->logicop_func) {
147 case PIPE_LOGICOP_CLEAR:
148 for (j = 0; j < 4; j++)
149 res4[j] = 0;
150 break;
151 case PIPE_LOGICOP_NOR:
152 for (j = 0; j < 4; j++)
153 res4[j] = ~(src4[j] | dst4[j]);
154 break;
155 case PIPE_LOGICOP_AND_INVERTED:
156 for (j = 0; j < 4; j++)
157 res4[j] = ~src4[j] & dst4[j];
158 break;
159 case PIPE_LOGICOP_COPY_INVERTED:
160 for (j = 0; j < 4; j++)
161 res4[j] = ~src4[j];
162 break;
163 case PIPE_LOGICOP_AND_REVERSE:
164 for (j = 0; j < 4; j++)
165 res4[j] = src4[j] & ~dst4[j];
166 break;
167 case PIPE_LOGICOP_INVERT:
168 for (j = 0; j < 4; j++)
169 res4[j] = ~dst4[j];
170 break;
171 case PIPE_LOGICOP_XOR:
172 for (j = 0; j < 4; j++)
173 res4[j] = dst4[j] ^ src4[j];
174 break;
175 case PIPE_LOGICOP_NAND:
176 for (j = 0; j < 4; j++)
177 res4[j] = ~(src4[j] & dst4[j]);
178 break;
179 case PIPE_LOGICOP_AND:
180 for (j = 0; j < 4; j++)
181 res4[j] = src4[j] & dst4[j];
182 break;
183 case PIPE_LOGICOP_EQUIV:
184 for (j = 0; j < 4; j++)
185 res4[j] = ~(src4[j] ^ dst4[j]);
186 break;
187 case PIPE_LOGICOP_NOOP:
188 for (j = 0; j < 4; j++)
189 res4[j] = dst4[j];
190 break;
191 case PIPE_LOGICOP_OR_INVERTED:
192 for (j = 0; j < 4; j++)
193 res4[j] = ~src4[j] | dst4[j];
194 break;
195 case PIPE_LOGICOP_COPY:
196 for (j = 0; j < 4; j++)
197 res4[j] = src4[j];
198 break;
199 case PIPE_LOGICOP_OR_REVERSE:
200 for (j = 0; j < 4; j++)
201 res4[j] = src4[j] | ~dst4[j];
202 break;
203 case PIPE_LOGICOP_OR:
204 for (j = 0; j < 4; j++)
205 res4[j] = src4[j] | dst4[j];
206 break;
207 case PIPE_LOGICOP_SET:
208 for (j = 0; j < 4; j++)
209 res4[j] = ~0;
210 break;
211 default:
212 assert(0 && "invalid logicop mode");
213 }
214
215 for (j = 0; j < 4; j++) {
216 quadColor[j][0] = ubyte_to_float(res[j][0]);
217 quadColor[j][1] = ubyte_to_float(res[j][1]);
218 quadColor[j][2] = ubyte_to_float(res[j][2]);
219 quadColor[j][3] = ubyte_to_float(res[j][3]);
220 }
221 }
222
223
224
225 /**
226 * Do blending for a 2x2 quad for one color buffer.
227 * \param quadColor the incoming quad colors
228 * \param dest the destination/framebuffer quad colors
229 * \param blend_index which set of blending terms to use
230 * \param has_dst_alpha does the dest color buffer have an alpha channel?
231 */
232 static void
233 blend_quad(struct quad_stage *qs,
234 float (*quadColor)[4],
235 float (*dest)[4],
236 unsigned blend_index,
237 boolean has_dst_alpha)
238 {
239 static const float zero[4] = { 0, 0, 0, 0 };
240 static const float one[4] = { 1, 1, 1, 1 };
241 struct softpipe_context *softpipe = qs->softpipe;
242 float source[4][QUAD_SIZE] = { { 0 } };
243 float blend_dest[4][QUAD_SIZE];
244
245 /*
246 * Compute src/first term RGB
247 */
248 switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
249 case PIPE_BLENDFACTOR_ONE:
250 VEC4_COPY(source[0], quadColor[0]); /* R */
251 VEC4_COPY(source[1], quadColor[1]); /* G */
252 VEC4_COPY(source[2], quadColor[2]); /* B */
253 break;
254 case PIPE_BLENDFACTOR_SRC_COLOR:
255 VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
256 VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
257 VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
258 break;
259 case PIPE_BLENDFACTOR_SRC_ALPHA:
260 {
261 const float *alpha = quadColor[3];
262 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
263 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
264 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
265 }
266 break;
267 case PIPE_BLENDFACTOR_DST_COLOR:
268 VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
269 VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
270 VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
271 break;
272 case PIPE_BLENDFACTOR_DST_ALPHA:
273 if (has_dst_alpha) {
274 const float *alpha = dest[3];
275 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
276 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
277 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
278 }
279 else {
280 VEC4_COPY(source[0], quadColor[0]); /* R */
281 VEC4_COPY(source[1], quadColor[1]); /* G */
282 VEC4_COPY(source[2], quadColor[2]); /* B */
283 }
284 break;
285 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
286 if (has_dst_alpha) {
287 const float *alpha = quadColor[3];
288 float diff[4], temp[4];
289 VEC4_SUB(diff, one, dest[3]);
290 VEC4_MIN(temp, alpha, diff);
291 VEC4_MUL(source[0], quadColor[0], temp); /* R */
292 VEC4_MUL(source[1], quadColor[1], temp); /* G */
293 VEC4_MUL(source[2], quadColor[2], temp); /* B */
294 }
295 else {
296 VEC4_COPY(source[0], zero); /* R */
297 VEC4_COPY(source[1], zero); /* G */
298 VEC4_COPY(source[2], zero); /* B */
299 }
300 break;
301 case PIPE_BLENDFACTOR_CONST_COLOR:
302 {
303 float comp[4];
304 VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
305 VEC4_MUL(source[0], quadColor[0], comp); /* R */
306 VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
307 VEC4_MUL(source[1], quadColor[1], comp); /* G */
308 VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
309 VEC4_MUL(source[2], quadColor[2], comp); /* B */
310 }
311 break;
312 case PIPE_BLENDFACTOR_CONST_ALPHA:
313 {
314 float alpha[4];
315 VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
316 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
317 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
318 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
319 }
320 break;
321 case PIPE_BLENDFACTOR_SRC1_COLOR:
322 assert(0); /* to do */
323 break;
324 case PIPE_BLENDFACTOR_SRC1_ALPHA:
325 assert(0); /* to do */
326 break;
327 case PIPE_BLENDFACTOR_ZERO:
328 VEC4_COPY(source[0], zero); /* R */
329 VEC4_COPY(source[1], zero); /* G */
330 VEC4_COPY(source[2], zero); /* B */
331 break;
332 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
333 {
334 float inv_comp[4];
335 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
336 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
337 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
338 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
339 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
340 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
341 }
342 break;
343 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
344 {
345 float inv_alpha[4];
346 VEC4_SUB(inv_alpha, one, quadColor[3]);
347 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
348 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
349 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
350 }
351 break;
352 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
353 if (has_dst_alpha) {
354 float inv_alpha[4];
355 VEC4_SUB(inv_alpha, one, dest[3]);
356 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
357 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
358 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
359 }
360 else {
361 VEC4_COPY(source[0], zero); /* R */
362 VEC4_COPY(source[1], zero); /* G */
363 VEC4_COPY(source[2], zero); /* B */
364 }
365 break;
366 case PIPE_BLENDFACTOR_INV_DST_COLOR:
367 {
368 float inv_comp[4];
369 VEC4_SUB(inv_comp, one, dest[0]); /* R */
370 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
371 VEC4_SUB(inv_comp, one, dest[1]); /* G */
372 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
373 VEC4_SUB(inv_comp, one, dest[2]); /* B */
374 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
375 }
376 break;
377 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
378 {
379 float inv_comp[4];
380 /* R */
381 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
382 VEC4_MUL(source[0], quadColor[0], inv_comp);
383 /* G */
384 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
385 VEC4_MUL(source[1], quadColor[1], inv_comp);
386 /* B */
387 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
388 VEC4_MUL(source[2], quadColor[2], inv_comp);
389 }
390 break;
391 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
392 {
393 float inv_alpha[4];
394 VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]);
395 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
396 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
397 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
398 }
399 break;
400 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
401 assert(0); /* to do */
402 break;
403 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
404 assert(0); /* to do */
405 break;
406 default:
407 assert(0 && "invalid rgb src factor");
408 }
409
410 /*
411 * Compute src/first term A
412 */
413 switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
414 case PIPE_BLENDFACTOR_ONE:
415 VEC4_COPY(source[3], quadColor[3]); /* A */
416 break;
417 case PIPE_BLENDFACTOR_SRC_COLOR:
418 /* fall-through */
419 case PIPE_BLENDFACTOR_SRC_ALPHA:
420 {
421 const float *alpha = quadColor[3];
422 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
423 }
424 break;
425 case PIPE_BLENDFACTOR_DST_COLOR:
426 /* fall-through */
427 case PIPE_BLENDFACTOR_DST_ALPHA:
428 if (has_dst_alpha)
429 VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
430 else
431 VEC4_COPY(source[3], quadColor[3]); /* A */
432 break;
433 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
434 /* multiply alpha by 1.0 */
435 VEC4_COPY(source[3], quadColor[3]); /* A */
436 break;
437 case PIPE_BLENDFACTOR_CONST_COLOR:
438 /* fall-through */
439 case PIPE_BLENDFACTOR_CONST_ALPHA:
440 {
441 float comp[4];
442 VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
443 VEC4_MUL(source[3], quadColor[3], comp); /* A */
444 }
445 break;
446 case PIPE_BLENDFACTOR_ZERO:
447 VEC4_COPY(source[3], zero); /* A */
448 break;
449 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
450 /* fall-through */
451 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
452 {
453 float inv_alpha[4];
454 VEC4_SUB(inv_alpha, one, quadColor[3]);
455 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
456 }
457 break;
458 case PIPE_BLENDFACTOR_INV_DST_COLOR:
459 /* fall-through */
460 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
461 if (has_dst_alpha) {
462 float inv_alpha[4];
463 VEC4_SUB(inv_alpha, one, dest[3]);
464 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
465 }
466 else {
467 VEC4_COPY(source[3], zero); /* A */
468 }
469 break;
470 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
471 /* fall-through */
472 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
473 {
474 float inv_comp[4];
475 /* A */
476 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
477 VEC4_MUL(source[3], quadColor[3], inv_comp);
478 }
479 break;
480 default:
481 assert(0 && "invalid alpha src factor");
482 }
483
484 /* Save the original dest for use in masking */
485 VEC4_COPY(blend_dest[0], dest[0]);
486 VEC4_COPY(blend_dest[1], dest[1]);
487 VEC4_COPY(blend_dest[2], dest[2]);
488 VEC4_COPY(blend_dest[3], dest[3]);
489
490
491 /*
492 * Compute blend_dest/second term RGB
493 */
494 switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
495 case PIPE_BLENDFACTOR_ONE:
496 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
497 break;
498 case PIPE_BLENDFACTOR_SRC_COLOR:
499 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
500 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
501 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
502 break;
503 case PIPE_BLENDFACTOR_SRC_ALPHA:
504 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
505 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
506 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
507 break;
508 case PIPE_BLENDFACTOR_DST_ALPHA:
509 if (has_dst_alpha) {
510 VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
511 VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
512 VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
513 }
514 else {
515 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
516 }
517 break;
518 case PIPE_BLENDFACTOR_DST_COLOR:
519 VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
520 VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
521 VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
522 break;
523 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
524 if (has_dst_alpha) {
525 const float *alpha = quadColor[3];
526 float diff[4], temp[4];
527 VEC4_SUB(diff, one, blend_dest[3]);
528 VEC4_MIN(temp, alpha, diff);
529 VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */
530 VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */
531 VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */
532 }
533 else {
534 VEC4_COPY(blend_dest[0], zero); /* R */
535 VEC4_COPY(blend_dest[1], zero); /* G */
536 VEC4_COPY(blend_dest[2], zero); /* B */
537 }
538 break;
539 case PIPE_BLENDFACTOR_CONST_COLOR:
540 {
541 float comp[4];
542 VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
543 VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
544 VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
545 VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
546 VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
547 VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
548 }
549 break;
550 case PIPE_BLENDFACTOR_CONST_ALPHA:
551 {
552 float comp[4];
553 VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
554 VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
555 VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
556 VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
557 }
558 break;
559 case PIPE_BLENDFACTOR_ZERO:
560 VEC4_COPY(blend_dest[0], zero); /* R */
561 VEC4_COPY(blend_dest[1], zero); /* G */
562 VEC4_COPY(blend_dest[2], zero); /* B */
563 break;
564 case PIPE_BLENDFACTOR_SRC1_COLOR:
565 case PIPE_BLENDFACTOR_SRC1_ALPHA:
566 /* XXX what are these? */
567 assert(0);
568 break;
569 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
570 {
571 float inv_comp[4];
572 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
573 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
574 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
575 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
576 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
577 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
578 }
579 break;
580 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
581 {
582 float one_minus_alpha[QUAD_SIZE];
583 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
584 VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
585 VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
586 VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
587 }
588 break;
589 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
590 if (has_dst_alpha) {
591 float inv_comp[4];
592 VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
593 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
594 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
595 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
596 }
597 else {
598 VEC4_COPY(blend_dest[0], zero); /* R */
599 VEC4_COPY(blend_dest[1], zero); /* G */
600 VEC4_COPY(blend_dest[2], zero); /* B */
601 }
602 break;
603 case PIPE_BLENDFACTOR_INV_DST_COLOR:
604 {
605 float inv_comp[4];
606 VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
607 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
608 VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
609 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
610 VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
611 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
612 }
613 break;
614 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
615 {
616 float inv_comp[4];
617 /* R */
618 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
619 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
620 /* G */
621 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
622 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
623 /* B */
624 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
625 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
626 }
627 break;
628 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
629 {
630 float inv_comp[4];
631 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
632 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
633 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
634 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
635 }
636 break;
637 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
638 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
639 /* XXX what are these? */
640 assert(0);
641 break;
642 default:
643 assert(0 && "invalid rgb dst factor");
644 }
645
646 /*
647 * Compute blend_dest/second term A
648 */
649 switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
650 case PIPE_BLENDFACTOR_ONE:
651 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
652 break;
653 case PIPE_BLENDFACTOR_SRC_COLOR:
654 /* fall-through */
655 case PIPE_BLENDFACTOR_SRC_ALPHA:
656 VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
657 break;
658 case PIPE_BLENDFACTOR_DST_COLOR:
659 /* fall-through */
660 case PIPE_BLENDFACTOR_DST_ALPHA:
661 if (has_dst_alpha) {
662 VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
663 }
664 else {
665 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
666 }
667 break;
668 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
669 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
670 break;
671 case PIPE_BLENDFACTOR_CONST_COLOR:
672 /* fall-through */
673 case PIPE_BLENDFACTOR_CONST_ALPHA:
674 {
675 float comp[4];
676 VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
677 VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
678 }
679 break;
680 case PIPE_BLENDFACTOR_ZERO:
681 VEC4_COPY(blend_dest[3], zero); /* A */
682 break;
683 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
684 /* fall-through */
685 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
686 {
687 float one_minus_alpha[QUAD_SIZE];
688 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
689 VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
690 }
691 break;
692 case PIPE_BLENDFACTOR_INV_DST_COLOR:
693 /* fall-through */
694 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
695 if (has_dst_alpha) {
696 float inv_comp[4];
697 VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
698 VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
699 }
700 else {
701 VEC4_COPY(blend_dest[3], zero); /* A */
702 }
703 break;
704 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
705 /* fall-through */
706 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
707 {
708 float inv_comp[4];
709 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
710 VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
711 }
712 break;
713 default:
714 assert(0 && "invalid alpha dst factor");
715 }
716
717 /*
718 * Combine RGB terms
719 */
720 switch (softpipe->blend->rt[blend_index].rgb_func) {
721 case PIPE_BLEND_ADD:
722 VEC4_ADD_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
723 VEC4_ADD_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
724 VEC4_ADD_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
725 break;
726 case PIPE_BLEND_SUBTRACT:
727 VEC4_SUB_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
728 VEC4_SUB_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
729 VEC4_SUB_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
730 break;
731 case PIPE_BLEND_REVERSE_SUBTRACT:
732 VEC4_SUB_SAT(quadColor[0], blend_dest[0], source[0]); /* R */
733 VEC4_SUB_SAT(quadColor[1], blend_dest[1], source[1]); /* G */
734 VEC4_SUB_SAT(quadColor[2], blend_dest[2], source[2]); /* B */
735 break;
736 case PIPE_BLEND_MIN:
737 VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
738 VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
739 VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
740 break;
741 case PIPE_BLEND_MAX:
742 VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
743 VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
744 VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
745 break;
746 default:
747 assert(0 && "invalid rgb blend func");
748 }
749
750 /*
751 * Combine A terms
752 */
753 switch (softpipe->blend->rt[blend_index].alpha_func) {
754 case PIPE_BLEND_ADD:
755 VEC4_ADD_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
756 break;
757 case PIPE_BLEND_SUBTRACT:
758 VEC4_SUB_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
759 break;
760 case PIPE_BLEND_REVERSE_SUBTRACT:
761 VEC4_SUB_SAT(quadColor[3], blend_dest[3], source[3]); /* A */
762 break;
763 case PIPE_BLEND_MIN:
764 VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
765 break;
766 case PIPE_BLEND_MAX:
767 VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
768 break;
769 default:
770 assert(0 && "invalid alpha blend func");
771 }
772 }
773
774 static void
775 colormask_quad(unsigned colormask,
776 float (*quadColor)[4],
777 float (*dest)[4])
778 {
779 /* R */
780 if (!(colormask & PIPE_MASK_R))
781 COPY_4V(quadColor[0], dest[0]);
782
783 /* G */
784 if (!(colormask & PIPE_MASK_G))
785 COPY_4V(quadColor[1], dest[1]);
786
787 /* B */
788 if (!(colormask & PIPE_MASK_B))
789 COPY_4V(quadColor[2], dest[2]);
790
791 /* A */
792 if (!(colormask & PIPE_MASK_A))
793 COPY_4V(quadColor[3], dest[3]);
794 }
795
796
797 static void
798 blend_fallback(struct quad_stage *qs,
799 struct quad_header *quads[],
800 unsigned nr)
801 {
802 struct softpipe_context *softpipe = qs->softpipe;
803 const struct pipe_blend_state *blend = softpipe->blend;
804 unsigned cbuf;
805 boolean write_all;
806
807 write_all = softpipe->fs_variant->info.color0_writes_all_cbufs;
808
809 for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++)
810 {
811 /* which blend/mask state index to use: */
812 const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
813 float dest[4][QUAD_SIZE];
814 struct softpipe_cached_tile *tile
815 = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
816 quads[0]->input.x0,
817 quads[0]->input.y0);
818 boolean has_dst_alpha
819 = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format);
820 uint q, i, j, qbuf;
821
822 qbuf = write_all ? 0 : cbuf;
823
824 for (q = 0; q < nr; q++) {
825 struct quad_header *quad = quads[q];
826 float (*quadColor)[4];
827 const int itx = (quad->input.x0 & (TILE_SIZE-1));
828 const int ity = (quad->input.y0 & (TILE_SIZE-1));
829
830 quadColor = quad->output.color[qbuf];
831
832 /* get/swizzle dest colors
833 */
834 for (j = 0; j < QUAD_SIZE; j++) {
835 int x = itx + (j & 1);
836 int y = ity + (j >> 1);
837 for (i = 0; i < 4; i++) {
838 dest[i][j] = tile->data.color[y][x][i];
839 }
840 }
841
842
843 if (blend->logicop_enable) {
844 logicop_quad( qs, quadColor, dest );
845 }
846 else if (blend->rt[blend_buf].blend_enable) {
847 blend_quad( qs, quadColor, dest, blend_buf, has_dst_alpha );
848 }
849
850 if (blend->rt[blend_buf].colormask != 0xf)
851 colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
852
853 /* Output color values
854 */
855 for (j = 0; j < QUAD_SIZE; j++) {
856 if (quad->inout.mask & (1 << j)) {
857 int x = itx + (j & 1);
858 int y = ity + (j >> 1);
859 for (i = 0; i < 4; i++) { /* loop over color chans */
860 tile->data.color[y][x][i] = quadColor[i][j];
861 }
862 }
863 }
864 }
865 }
866 }
867
868
869 static void
870 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
871 struct quad_header *quads[],
872 unsigned nr)
873 {
874 static const float one[4] = { 1, 1, 1, 1 };
875 float one_minus_alpha[QUAD_SIZE];
876 float dest[4][QUAD_SIZE];
877 float source[4][QUAD_SIZE];
878 uint i, j, q;
879
880 struct softpipe_cached_tile *tile
881 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
882 quads[0]->input.x0,
883 quads[0]->input.y0);
884
885 for (q = 0; q < nr; q++) {
886 struct quad_header *quad = quads[q];
887 float (*quadColor)[4] = quad->output.color[0];
888 const float *alpha = quadColor[3];
889 const int itx = (quad->input.x0 & (TILE_SIZE-1));
890 const int ity = (quad->input.y0 & (TILE_SIZE-1));
891
892 /* get/swizzle dest colors */
893 for (j = 0; j < QUAD_SIZE; j++) {
894 int x = itx + (j & 1);
895 int y = ity + (j >> 1);
896 for (i = 0; i < 4; i++) {
897 dest[i][j] = tile->data.color[y][x][i];
898 }
899 }
900
901 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
902 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
903 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
904 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
905
906 VEC4_SUB(one_minus_alpha, one, alpha);
907 VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
908 VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
909 VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
910 VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */
911
912 VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
913 VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
914 VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
915 VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
916
917 for (j = 0; j < QUAD_SIZE; j++) {
918 if (quad->inout.mask & (1 << j)) {
919 int x = itx + (j & 1);
920 int y = ity + (j >> 1);
921 for (i = 0; i < 4; i++) { /* loop over color chans */
922 tile->data.color[y][x][i] = quadColor[i][j];
923 }
924 }
925 }
926 }
927 }
928
929 static void
930 blend_single_add_one_one(struct quad_stage *qs,
931 struct quad_header *quads[],
932 unsigned nr)
933 {
934 float dest[4][QUAD_SIZE];
935 uint i, j, q;
936
937 struct softpipe_cached_tile *tile
938 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
939 quads[0]->input.x0,
940 quads[0]->input.y0);
941
942 for (q = 0; q < nr; q++) {
943 struct quad_header *quad = quads[q];
944 float (*quadColor)[4] = quad->output.color[0];
945 const int itx = (quad->input.x0 & (TILE_SIZE-1));
946 const int ity = (quad->input.y0 & (TILE_SIZE-1));
947
948 /* get/swizzle dest colors */
949 for (j = 0; j < QUAD_SIZE; j++) {
950 int x = itx + (j & 1);
951 int y = ity + (j >> 1);
952 for (i = 0; i < 4; i++) {
953 dest[i][j] = tile->data.color[y][x][i];
954 }
955 }
956
957 VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */
958 VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */
959 VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */
960 VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */
961
962 for (j = 0; j < QUAD_SIZE; j++) {
963 if (quad->inout.mask & (1 << j)) {
964 int x = itx + (j & 1);
965 int y = ity + (j >> 1);
966 for (i = 0; i < 4; i++) { /* loop over color chans */
967 tile->data.color[y][x][i] = quadColor[i][j];
968 }
969 }
970 }
971 }
972 }
973
974
975 static void
976 single_output_color(struct quad_stage *qs,
977 struct quad_header *quads[],
978 unsigned nr)
979 {
980 uint i, j, q;
981
982 struct softpipe_cached_tile *tile
983 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
984 quads[0]->input.x0,
985 quads[0]->input.y0);
986
987 for (q = 0; q < nr; q++) {
988 struct quad_header *quad = quads[q];
989 float (*quadColor)[4] = quad->output.color[0];
990 const int itx = (quad->input.x0 & (TILE_SIZE-1));
991 const int ity = (quad->input.y0 & (TILE_SIZE-1));
992
993 for (j = 0; j < QUAD_SIZE; j++) {
994 if (quad->inout.mask & (1 << j)) {
995 int x = itx + (j & 1);
996 int y = ity + (j >> 1);
997 for (i = 0; i < 4; i++) { /* loop over color chans */
998 tile->data.color[y][x][i] = quadColor[i][j];
999 }
1000 }
1001 }
1002 }
1003 }
1004
1005 static void
1006 blend_noop(struct quad_stage *qs,
1007 struct quad_header *quads[],
1008 unsigned nr)
1009 {
1010 }
1011
1012
1013 static void
1014 choose_blend_quad(struct quad_stage *qs,
1015 struct quad_header *quads[],
1016 unsigned nr)
1017 {
1018 struct softpipe_context *softpipe = qs->softpipe;
1019 const struct pipe_blend_state *blend = softpipe->blend;
1020
1021 qs->run = blend_fallback;
1022
1023 if (softpipe->framebuffer.nr_cbufs == 0) {
1024 qs->run = blend_noop;
1025 }
1026 else if (!softpipe->blend->logicop_enable &&
1027 softpipe->blend->rt[0].colormask == 0xf &&
1028 softpipe->framebuffer.nr_cbufs == 1)
1029 {
1030 if (!blend->rt[0].blend_enable) {
1031 qs->run = single_output_color;
1032 }
1033 else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1034 blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1035 blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1036 {
1037 if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1038 if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1039 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1040 qs->run = blend_single_add_one_one;
1041 }
1042 else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1043 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1044 qs->run = blend_single_add_src_alpha_inv_src_alpha;
1045
1046 }
1047 }
1048 }
1049
1050 qs->run(qs, quads, nr);
1051 }
1052
1053
1054 static void blend_begin(struct quad_stage *qs)
1055 {
1056 qs->run = choose_blend_quad;
1057 }
1058
1059
1060 static void blend_destroy(struct quad_stage *qs)
1061 {
1062 FREE( qs );
1063 }
1064
1065
1066 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1067 {
1068 struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
1069
1070 stage->softpipe = softpipe;
1071 stage->begin = blend_begin;
1072 stage->run = choose_blend_quad;
1073 stage->destroy = blend_destroy;
1074
1075 return stage;
1076 }