r300g: only check for an empty shader if there are no compile errors
[mesa.git] / src / gallium / drivers / softpipe / sp_quad_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * quad blending
30 * \author Brian Paul
31 */
32
33 #include "pipe/p_defines.h"
34 #include "util/u_math.h"
35 #include "util/u_memory.h"
36 #include "util/u_format.h"
37 #include "sp_context.h"
38 #include "sp_quad.h"
39 #include "sp_tile_cache.h"
40 #include "sp_quad_pipe.h"
41
42
43 #define VEC4_COPY(DST, SRC) \
44 do { \
45 DST[0] = SRC[0]; \
46 DST[1] = SRC[1]; \
47 DST[2] = SRC[2]; \
48 DST[3] = SRC[3]; \
49 } while(0)
50
51 #define VEC4_SCALAR(DST, SRC) \
52 do { \
53 DST[0] = SRC; \
54 DST[1] = SRC; \
55 DST[2] = SRC; \
56 DST[3] = SRC; \
57 } while(0)
58
59 #define VEC4_ADD(R, A, B) \
60 do { \
61 R[0] = A[0] + B[0]; \
62 R[1] = A[1] + B[1]; \
63 R[2] = A[2] + B[2]; \
64 R[3] = A[3] + B[3]; \
65 } while (0)
66
67 #define VEC4_SUB(R, A, B) \
68 do { \
69 R[0] = A[0] - B[0]; \
70 R[1] = A[1] - B[1]; \
71 R[2] = A[2] - B[2]; \
72 R[3] = A[3] - B[3]; \
73 } while (0)
74
75 /** Add and limit result to ceiling of 1.0 */
76 #define VEC4_ADD_SAT(R, A, B) \
77 do { \
78 R[0] = A[0] + B[0]; if (R[0] > 1.0f) R[0] = 1.0f; \
79 R[1] = A[1] + B[1]; if (R[1] > 1.0f) R[1] = 1.0f; \
80 R[2] = A[2] + B[2]; if (R[2] > 1.0f) R[2] = 1.0f; \
81 R[3] = A[3] + B[3]; if (R[3] > 1.0f) R[3] = 1.0f; \
82 } while (0)
83
84 /** Subtract and limit result to floor of 0.0 */
85 #define VEC4_SUB_SAT(R, A, B) \
86 do { \
87 R[0] = A[0] - B[0]; if (R[0] < 0.0f) R[0] = 0.0f; \
88 R[1] = A[1] - B[1]; if (R[1] < 0.0f) R[1] = 0.0f; \
89 R[2] = A[2] - B[2]; if (R[2] < 0.0f) R[2] = 0.0f; \
90 R[3] = A[3] - B[3]; if (R[3] < 0.0f) R[3] = 0.0f; \
91 } while (0)
92
93 #define VEC4_MUL(R, A, B) \
94 do { \
95 R[0] = A[0] * B[0]; \
96 R[1] = A[1] * B[1]; \
97 R[2] = A[2] * B[2]; \
98 R[3] = A[3] * B[3]; \
99 } while (0)
100
101 #define VEC4_MIN(R, A, B) \
102 do { \
103 R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
104 R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
105 R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
106 R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
107 } while (0)
108
109 #define VEC4_MAX(R, A, B) \
110 do { \
111 R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
112 R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
113 R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
114 R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
115 } while (0)
116
117
118
119 static void
120 logicop_quad(struct quad_stage *qs,
121 float (*quadColor)[4],
122 float (*dest)[4])
123 {
124 struct softpipe_context *softpipe = qs->softpipe;
125 ubyte src[4][4], dst[4][4], res[4][4];
126 uint *src4 = (uint *) src;
127 uint *dst4 = (uint *) dst;
128 uint *res4 = (uint *) res;
129 uint j;
130
131
132 /* convert to ubyte */
133 for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
134 dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
135 dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
136 dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
137 dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
138
139 src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
140 src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
141 src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
142 src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
143 }
144
145 switch (softpipe->blend->logicop_func) {
146 case PIPE_LOGICOP_CLEAR:
147 for (j = 0; j < 4; j++)
148 res4[j] = 0;
149 break;
150 case PIPE_LOGICOP_NOR:
151 for (j = 0; j < 4; j++)
152 res4[j] = ~(src4[j] | dst4[j]);
153 break;
154 case PIPE_LOGICOP_AND_INVERTED:
155 for (j = 0; j < 4; j++)
156 res4[j] = ~src4[j] & dst4[j];
157 break;
158 case PIPE_LOGICOP_COPY_INVERTED:
159 for (j = 0; j < 4; j++)
160 res4[j] = ~src4[j];
161 break;
162 case PIPE_LOGICOP_AND_REVERSE:
163 for (j = 0; j < 4; j++)
164 res4[j] = src4[j] & ~dst4[j];
165 break;
166 case PIPE_LOGICOP_INVERT:
167 for (j = 0; j < 4; j++)
168 res4[j] = ~dst4[j];
169 break;
170 case PIPE_LOGICOP_XOR:
171 for (j = 0; j < 4; j++)
172 res4[j] = dst4[j] ^ src4[j];
173 break;
174 case PIPE_LOGICOP_NAND:
175 for (j = 0; j < 4; j++)
176 res4[j] = ~(src4[j] & dst4[j]);
177 break;
178 case PIPE_LOGICOP_AND:
179 for (j = 0; j < 4; j++)
180 res4[j] = src4[j] & dst4[j];
181 break;
182 case PIPE_LOGICOP_EQUIV:
183 for (j = 0; j < 4; j++)
184 res4[j] = ~(src4[j] ^ dst4[j]);
185 break;
186 case PIPE_LOGICOP_NOOP:
187 for (j = 0; j < 4; j++)
188 res4[j] = dst4[j];
189 break;
190 case PIPE_LOGICOP_OR_INVERTED:
191 for (j = 0; j < 4; j++)
192 res4[j] = ~src4[j] | dst4[j];
193 break;
194 case PIPE_LOGICOP_COPY:
195 for (j = 0; j < 4; j++)
196 res4[j] = src4[j];
197 break;
198 case PIPE_LOGICOP_OR_REVERSE:
199 for (j = 0; j < 4; j++)
200 res4[j] = src4[j] | ~dst4[j];
201 break;
202 case PIPE_LOGICOP_OR:
203 for (j = 0; j < 4; j++)
204 res4[j] = src4[j] | dst4[j];
205 break;
206 case PIPE_LOGICOP_SET:
207 for (j = 0; j < 4; j++)
208 res4[j] = ~0;
209 break;
210 default:
211 assert(0 && "invalid logicop mode");
212 }
213
214 for (j = 0; j < 4; j++) {
215 quadColor[j][0] = ubyte_to_float(res[j][0]);
216 quadColor[j][1] = ubyte_to_float(res[j][1]);
217 quadColor[j][2] = ubyte_to_float(res[j][2]);
218 quadColor[j][3] = ubyte_to_float(res[j][3]);
219 }
220 }
221
222
223
224 /**
225 * Do blending for a 2x2 quad for one color buffer.
226 * \param quadColor the incoming quad colors
227 * \param dest the destination/framebuffer quad colors
228 * \param blend_index which set of blending terms to use
229 * \param has_dst_alpha does the dest color buffer have an alpha channel?
230 */
231 static void
232 blend_quad(struct quad_stage *qs,
233 float (*quadColor)[4],
234 float (*dest)[4],
235 unsigned blend_index,
236 boolean has_dst_alpha)
237 {
238 static const float zero[4] = { 0, 0, 0, 0 };
239 static const float one[4] = { 1, 1, 1, 1 };
240 struct softpipe_context *softpipe = qs->softpipe;
241 float source[4][QUAD_SIZE] = { { 0 } };
242
243 /*
244 * Compute src/first term RGB
245 */
246 switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
247 case PIPE_BLENDFACTOR_ONE:
248 VEC4_COPY(source[0], quadColor[0]); /* R */
249 VEC4_COPY(source[1], quadColor[1]); /* G */
250 VEC4_COPY(source[2], quadColor[2]); /* B */
251 break;
252 case PIPE_BLENDFACTOR_SRC_COLOR:
253 VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
254 VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
255 VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
256 break;
257 case PIPE_BLENDFACTOR_SRC_ALPHA:
258 {
259 const float *alpha = quadColor[3];
260 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
261 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
262 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
263 }
264 break;
265 case PIPE_BLENDFACTOR_DST_COLOR:
266 VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
267 VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
268 VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
269 break;
270 case PIPE_BLENDFACTOR_DST_ALPHA:
271 if (has_dst_alpha) {
272 const float *alpha = dest[3];
273 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
274 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
275 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
276 }
277 else {
278 VEC4_COPY(source[0], quadColor[0]); /* R */
279 VEC4_COPY(source[1], quadColor[1]); /* G */
280 VEC4_COPY(source[2], quadColor[2]); /* B */
281 }
282 break;
283 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
284 if (has_dst_alpha) {
285 const float *alpha = quadColor[3];
286 float diff[4], temp[4];
287 VEC4_SUB(diff, one, dest[3]);
288 VEC4_MIN(temp, alpha, diff);
289 VEC4_MUL(source[0], quadColor[0], temp); /* R */
290 VEC4_MUL(source[1], quadColor[1], temp); /* G */
291 VEC4_MUL(source[2], quadColor[2], temp); /* B */
292 }
293 else {
294 VEC4_COPY(source[0], zero); /* R */
295 VEC4_COPY(source[1], zero); /* G */
296 VEC4_COPY(source[2], zero); /* B */
297 }
298 break;
299 case PIPE_BLENDFACTOR_CONST_COLOR:
300 {
301 float comp[4];
302 VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
303 VEC4_MUL(source[0], quadColor[0], comp); /* R */
304 VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
305 VEC4_MUL(source[1], quadColor[1], comp); /* G */
306 VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
307 VEC4_MUL(source[2], quadColor[2], comp); /* B */
308 }
309 break;
310 case PIPE_BLENDFACTOR_CONST_ALPHA:
311 {
312 float alpha[4];
313 VEC4_SCALAR(alpha, softpipe->blend_color.color[3]);
314 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
315 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
316 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
317 }
318 break;
319 case PIPE_BLENDFACTOR_SRC1_COLOR:
320 assert(0); /* to do */
321 break;
322 case PIPE_BLENDFACTOR_SRC1_ALPHA:
323 assert(0); /* to do */
324 break;
325 case PIPE_BLENDFACTOR_ZERO:
326 VEC4_COPY(source[0], zero); /* R */
327 VEC4_COPY(source[1], zero); /* G */
328 VEC4_COPY(source[2], zero); /* B */
329 break;
330 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
331 {
332 float inv_comp[4];
333 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
334 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
335 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
336 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
337 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
338 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
339 }
340 break;
341 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
342 {
343 float inv_alpha[4];
344 VEC4_SUB(inv_alpha, one, quadColor[3]);
345 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
346 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
347 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
348 }
349 break;
350 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
351 if (has_dst_alpha) {
352 float inv_alpha[4];
353 VEC4_SUB(inv_alpha, one, dest[3]);
354 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
355 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
356 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
357 }
358 else {
359 VEC4_COPY(source[0], zero); /* R */
360 VEC4_COPY(source[1], zero); /* G */
361 VEC4_COPY(source[2], zero); /* B */
362 }
363 break;
364 case PIPE_BLENDFACTOR_INV_DST_COLOR:
365 {
366 float inv_comp[4];
367 VEC4_SUB(inv_comp, one, dest[0]); /* R */
368 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
369 VEC4_SUB(inv_comp, one, dest[1]); /* G */
370 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
371 VEC4_SUB(inv_comp, one, dest[2]); /* B */
372 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
373 }
374 break;
375 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
376 {
377 float inv_comp[4];
378 /* R */
379 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
380 VEC4_MUL(source[0], quadColor[0], inv_comp);
381 /* G */
382 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
383 VEC4_MUL(source[1], quadColor[1], inv_comp);
384 /* B */
385 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
386 VEC4_MUL(source[2], quadColor[2], inv_comp);
387 }
388 break;
389 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
390 {
391 float inv_alpha[4];
392 VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]);
393 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
394 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
395 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
396 }
397 break;
398 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
399 assert(0); /* to do */
400 break;
401 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
402 assert(0); /* to do */
403 break;
404 default:
405 assert(0 && "invalid rgb src factor");
406 }
407
408 /*
409 * Compute src/first term A
410 */
411 switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
412 case PIPE_BLENDFACTOR_ONE:
413 VEC4_COPY(source[3], quadColor[3]); /* A */
414 break;
415 case PIPE_BLENDFACTOR_SRC_COLOR:
416 /* fall-through */
417 case PIPE_BLENDFACTOR_SRC_ALPHA:
418 {
419 const float *alpha = quadColor[3];
420 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
421 }
422 break;
423 case PIPE_BLENDFACTOR_DST_COLOR:
424 /* fall-through */
425 case PIPE_BLENDFACTOR_DST_ALPHA:
426 if (has_dst_alpha)
427 VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
428 else
429 VEC4_COPY(source[3], quadColor[3]); /* A */
430 break;
431 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
432 /* multiply alpha by 1.0 */
433 VEC4_COPY(source[3], quadColor[3]); /* A */
434 break;
435 case PIPE_BLENDFACTOR_CONST_COLOR:
436 /* fall-through */
437 case PIPE_BLENDFACTOR_CONST_ALPHA:
438 {
439 float comp[4];
440 VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
441 VEC4_MUL(source[3], quadColor[3], comp); /* A */
442 }
443 break;
444 case PIPE_BLENDFACTOR_ZERO:
445 VEC4_COPY(source[3], zero); /* A */
446 break;
447 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
448 /* fall-through */
449 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
450 {
451 float inv_alpha[4];
452 VEC4_SUB(inv_alpha, one, quadColor[3]);
453 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
454 }
455 break;
456 case PIPE_BLENDFACTOR_INV_DST_COLOR:
457 /* fall-through */
458 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
459 if (has_dst_alpha) {
460 float inv_alpha[4];
461 VEC4_SUB(inv_alpha, one, dest[3]);
462 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
463 }
464 else {
465 VEC4_COPY(source[3], zero); /* A */
466 }
467 break;
468 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
469 /* fall-through */
470 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
471 {
472 float inv_comp[4];
473 /* A */
474 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
475 VEC4_MUL(source[3], quadColor[3], inv_comp);
476 }
477 break;
478 default:
479 assert(0 && "invalid alpha src factor");
480 }
481
482
483 /*
484 * Compute dest/second term RGB
485 */
486 switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
487 case PIPE_BLENDFACTOR_ONE:
488 /* dest = dest * 1 NO-OP, leave dest as-is */
489 break;
490 case PIPE_BLENDFACTOR_SRC_COLOR:
491 VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
492 VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
493 VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
494 break;
495 case PIPE_BLENDFACTOR_SRC_ALPHA:
496 VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
497 VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
498 VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
499 break;
500 case PIPE_BLENDFACTOR_DST_ALPHA:
501 if (has_dst_alpha) {
502 VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
503 VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
504 VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
505 }
506 else {
507 /* dest = dest * 1 NO-OP, leave dest as-is */
508 }
509 break;
510 case PIPE_BLENDFACTOR_DST_COLOR:
511 VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
512 VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
513 VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
514 break;
515 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
516 if (has_dst_alpha) {
517 const float *alpha = quadColor[3];
518 float diff[4], temp[4];
519 VEC4_SUB(diff, one, dest[3]);
520 VEC4_MIN(temp, alpha, diff);
521 VEC4_MUL(dest[0], quadColor[0], temp); /* R */
522 VEC4_MUL(dest[1], quadColor[1], temp); /* G */
523 VEC4_MUL(dest[2], quadColor[2], temp); /* B */
524 }
525 else {
526 VEC4_COPY(dest[0], zero); /* R */
527 VEC4_COPY(dest[1], zero); /* G */
528 VEC4_COPY(dest[2], zero); /* B */
529 }
530 break;
531 case PIPE_BLENDFACTOR_CONST_COLOR:
532 {
533 float comp[4];
534 VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
535 VEC4_MUL(dest[0], dest[0], comp); /* R */
536 VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
537 VEC4_MUL(dest[1], dest[1], comp); /* G */
538 VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
539 VEC4_MUL(dest[2], dest[2], comp); /* B */
540 }
541 break;
542 case PIPE_BLENDFACTOR_CONST_ALPHA:
543 {
544 float comp[4];
545 VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
546 VEC4_MUL(dest[0], dest[0], comp); /* R */
547 VEC4_MUL(dest[1], dest[1], comp); /* G */
548 VEC4_MUL(dest[2], dest[2], comp); /* B */
549 }
550 break;
551 case PIPE_BLENDFACTOR_ZERO:
552 VEC4_COPY(dest[0], zero); /* R */
553 VEC4_COPY(dest[1], zero); /* G */
554 VEC4_COPY(dest[2], zero); /* B */
555 break;
556 case PIPE_BLENDFACTOR_SRC1_COLOR:
557 case PIPE_BLENDFACTOR_SRC1_ALPHA:
558 /* XXX what are these? */
559 assert(0);
560 break;
561 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
562 {
563 float inv_comp[4];
564 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
565 VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
566 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
567 VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
568 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
569 VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
570 }
571 break;
572 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
573 {
574 float one_minus_alpha[QUAD_SIZE];
575 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
576 VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
577 VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
578 VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
579 }
580 break;
581 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
582 if (has_dst_alpha) {
583 float inv_comp[4];
584 VEC4_SUB(inv_comp, one, dest[3]); /* A */
585 VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
586 VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
587 VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
588 }
589 else {
590 VEC4_COPY(dest[0], zero); /* R */
591 VEC4_COPY(dest[1], zero); /* G */
592 VEC4_COPY(dest[2], zero); /* B */
593 }
594 break;
595 case PIPE_BLENDFACTOR_INV_DST_COLOR:
596 {
597 float inv_comp[4];
598 VEC4_SUB(inv_comp, one, dest[0]); /* R */
599 VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
600 VEC4_SUB(inv_comp, one, dest[1]); /* G */
601 VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
602 VEC4_SUB(inv_comp, one, dest[2]); /* B */
603 VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
604 }
605 break;
606 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
607 {
608 float inv_comp[4];
609 /* R */
610 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
611 VEC4_MUL(dest[0], dest[0], inv_comp);
612 /* G */
613 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
614 VEC4_MUL(dest[1], dest[1], inv_comp);
615 /* B */
616 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
617 VEC4_MUL(dest[2], dest[2], inv_comp);
618 }
619 break;
620 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
621 {
622 float inv_comp[4];
623 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
624 VEC4_MUL(dest[0], dest[0], inv_comp);
625 VEC4_MUL(dest[1], dest[1], inv_comp);
626 VEC4_MUL(dest[2], dest[2], inv_comp);
627 }
628 break;
629 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
630 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
631 /* XXX what are these? */
632 assert(0);
633 break;
634 default:
635 assert(0 && "invalid rgb dst factor");
636 }
637
638 /*
639 * Compute dest/second term A
640 */
641 switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
642 case PIPE_BLENDFACTOR_ONE:
643 /* dest = dest * 1 NO-OP, leave dest as-is */
644 break;
645 case PIPE_BLENDFACTOR_SRC_COLOR:
646 /* fall-through */
647 case PIPE_BLENDFACTOR_SRC_ALPHA:
648 VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
649 break;
650 case PIPE_BLENDFACTOR_DST_COLOR:
651 /* fall-through */
652 case PIPE_BLENDFACTOR_DST_ALPHA:
653 if (has_dst_alpha) {
654 VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
655 }
656 else {
657 /* dest = dest * 1 NO-OP, leave dest as-is */
658 }
659 break;
660 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
661 /* dest = dest * 1 NO-OP, leave dest as-is */
662 break;
663 case PIPE_BLENDFACTOR_CONST_COLOR:
664 /* fall-through */
665 case PIPE_BLENDFACTOR_CONST_ALPHA:
666 {
667 float comp[4];
668 VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
669 VEC4_MUL(dest[3], dest[3], comp); /* A */
670 }
671 break;
672 case PIPE_BLENDFACTOR_ZERO:
673 VEC4_COPY(dest[3], zero); /* A */
674 break;
675 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
676 /* fall-through */
677 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
678 {
679 float one_minus_alpha[QUAD_SIZE];
680 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
681 VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
682 }
683 break;
684 case PIPE_BLENDFACTOR_INV_DST_COLOR:
685 /* fall-through */
686 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
687 if (has_dst_alpha) {
688 float inv_comp[4];
689 VEC4_SUB(inv_comp, one, dest[3]); /* A */
690 VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
691 }
692 else {
693 VEC4_COPY(dest[3], zero); /* A */
694 }
695 break;
696 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
697 /* fall-through */
698 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
699 {
700 float inv_comp[4];
701 VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
702 VEC4_MUL(dest[3], dest[3], inv_comp);
703 }
704 break;
705 default:
706 assert(0 && "invalid alpha dst factor");
707 }
708
709 /*
710 * Combine RGB terms
711 */
712 switch (softpipe->blend->rt[blend_index].rgb_func) {
713 case PIPE_BLEND_ADD:
714 VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
715 VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
716 VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
717 break;
718 case PIPE_BLEND_SUBTRACT:
719 VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
720 VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
721 VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
722 break;
723 case PIPE_BLEND_REVERSE_SUBTRACT:
724 VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
725 VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
726 VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
727 break;
728 case PIPE_BLEND_MIN:
729 VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
730 VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
731 VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
732 break;
733 case PIPE_BLEND_MAX:
734 VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
735 VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
736 VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
737 break;
738 default:
739 assert(0 && "invalid rgb blend func");
740 }
741
742 /*
743 * Combine A terms
744 */
745 switch (softpipe->blend->rt[blend_index].alpha_func) {
746 case PIPE_BLEND_ADD:
747 VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
748 break;
749 case PIPE_BLEND_SUBTRACT:
750 VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
751 break;
752 case PIPE_BLEND_REVERSE_SUBTRACT:
753 VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
754 break;
755 case PIPE_BLEND_MIN:
756 VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
757 break;
758 case PIPE_BLEND_MAX:
759 VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
760 break;
761 default:
762 assert(0 && "invalid alpha blend func");
763 }
764 }
765
766 static void
767 colormask_quad(unsigned colormask,
768 float (*quadColor)[4],
769 float (*dest)[4])
770 {
771 /* R */
772 if (!(colormask & PIPE_MASK_R))
773 COPY_4V(quadColor[0], dest[0]);
774
775 /* G */
776 if (!(colormask & PIPE_MASK_G))
777 COPY_4V(quadColor[1], dest[1]);
778
779 /* B */
780 if (!(colormask & PIPE_MASK_B))
781 COPY_4V(quadColor[2], dest[2]);
782
783 /* A */
784 if (!(colormask & PIPE_MASK_A))
785 COPY_4V(quadColor[3], dest[3]);
786 }
787
788
789 static void
790 blend_fallback(struct quad_stage *qs,
791 struct quad_header *quads[],
792 unsigned nr)
793 {
794 struct softpipe_context *softpipe = qs->softpipe;
795 const struct pipe_blend_state *blend = softpipe->blend;
796 unsigned cbuf;
797
798 for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++)
799 {
800 /* which blend/mask state index to use: */
801 const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
802 float dest[4][QUAD_SIZE];
803 struct softpipe_cached_tile *tile
804 = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
805 quads[0]->input.x0,
806 quads[0]->input.y0);
807 boolean has_dst_alpha
808 = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format);
809 uint q, i, j;
810
811 for (q = 0; q < nr; q++) {
812 struct quad_header *quad = quads[q];
813 float (*quadColor)[4] = quad->output.color[cbuf];
814 const int itx = (quad->input.x0 & (TILE_SIZE-1));
815 const int ity = (quad->input.y0 & (TILE_SIZE-1));
816
817 /* get/swizzle dest colors
818 */
819 for (j = 0; j < QUAD_SIZE; j++) {
820 int x = itx + (j & 1);
821 int y = ity + (j >> 1);
822 for (i = 0; i < 4; i++) {
823 dest[i][j] = tile->data.color[y][x][i];
824 }
825 }
826
827
828 if (blend->logicop_enable) {
829 logicop_quad( qs, quadColor, dest );
830 }
831 else if (blend->rt[blend_buf].blend_enable) {
832 blend_quad( qs, quadColor, dest, blend_buf, has_dst_alpha );
833 }
834
835 if (blend->rt[blend_buf].colormask != 0xf)
836 colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
837
838 /* Output color values
839 */
840 for (j = 0; j < QUAD_SIZE; j++) {
841 if (quad->inout.mask & (1 << j)) {
842 int x = itx + (j & 1);
843 int y = ity + (j >> 1);
844 for (i = 0; i < 4; i++) { /* loop over color chans */
845 tile->data.color[y][x][i] = quadColor[i][j];
846 }
847 }
848 }
849 }
850 }
851 }
852
853
854 static void
855 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
856 struct quad_header *quads[],
857 unsigned nr)
858 {
859 static const float one[4] = { 1, 1, 1, 1 };
860 float one_minus_alpha[QUAD_SIZE];
861 float dest[4][QUAD_SIZE];
862 float source[4][QUAD_SIZE];
863 uint i, j, q;
864
865 struct softpipe_cached_tile *tile
866 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
867 quads[0]->input.x0,
868 quads[0]->input.y0);
869
870 for (q = 0; q < nr; q++) {
871 struct quad_header *quad = quads[q];
872 float (*quadColor)[4] = quad->output.color[0];
873 const float *alpha = quadColor[3];
874 const int itx = (quad->input.x0 & (TILE_SIZE-1));
875 const int ity = (quad->input.y0 & (TILE_SIZE-1));
876
877 /* get/swizzle dest colors */
878 for (j = 0; j < QUAD_SIZE; j++) {
879 int x = itx + (j & 1);
880 int y = ity + (j >> 1);
881 for (i = 0; i < 4; i++) {
882 dest[i][j] = tile->data.color[y][x][i];
883 }
884 }
885
886 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
887 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
888 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
889 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
890
891 VEC4_SUB(one_minus_alpha, one, alpha);
892 VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
893 VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
894 VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
895 VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* B */
896
897 VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
898 VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
899 VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
900 VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
901
902 for (j = 0; j < QUAD_SIZE; j++) {
903 if (quad->inout.mask & (1 << j)) {
904 int x = itx + (j & 1);
905 int y = ity + (j >> 1);
906 for (i = 0; i < 4; i++) { /* loop over color chans */
907 tile->data.color[y][x][i] = quadColor[i][j];
908 }
909 }
910 }
911 }
912 }
913
914 static void
915 blend_single_add_one_one(struct quad_stage *qs,
916 struct quad_header *quads[],
917 unsigned nr)
918 {
919 float dest[4][QUAD_SIZE];
920 uint i, j, q;
921
922 struct softpipe_cached_tile *tile
923 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
924 quads[0]->input.x0,
925 quads[0]->input.y0);
926
927 for (q = 0; q < nr; q++) {
928 struct quad_header *quad = quads[q];
929 float (*quadColor)[4] = quad->output.color[0];
930 const int itx = (quad->input.x0 & (TILE_SIZE-1));
931 const int ity = (quad->input.y0 & (TILE_SIZE-1));
932
933 /* get/swizzle dest colors */
934 for (j = 0; j < QUAD_SIZE; j++) {
935 int x = itx + (j & 1);
936 int y = ity + (j >> 1);
937 for (i = 0; i < 4; i++) {
938 dest[i][j] = tile->data.color[y][x][i];
939 }
940 }
941
942 VEC4_ADD_SAT(quadColor[0], quadColor[0], dest[0]); /* R */
943 VEC4_ADD_SAT(quadColor[1], quadColor[1], dest[1]); /* G */
944 VEC4_ADD_SAT(quadColor[2], quadColor[2], dest[2]); /* B */
945 VEC4_ADD_SAT(quadColor[3], quadColor[3], dest[3]); /* A */
946
947 for (j = 0; j < QUAD_SIZE; j++) {
948 if (quad->inout.mask & (1 << j)) {
949 int x = itx + (j & 1);
950 int y = ity + (j >> 1);
951 for (i = 0; i < 4; i++) { /* loop over color chans */
952 tile->data.color[y][x][i] = quadColor[i][j];
953 }
954 }
955 }
956 }
957 }
958
959
960 static void
961 single_output_color(struct quad_stage *qs,
962 struct quad_header *quads[],
963 unsigned nr)
964 {
965 uint i, j, q;
966
967 struct softpipe_cached_tile *tile
968 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
969 quads[0]->input.x0,
970 quads[0]->input.y0);
971
972 for (q = 0; q < nr; q++) {
973 struct quad_header *quad = quads[q];
974 float (*quadColor)[4] = quad->output.color[0];
975 const int itx = (quad->input.x0 & (TILE_SIZE-1));
976 const int ity = (quad->input.y0 & (TILE_SIZE-1));
977
978 for (j = 0; j < QUAD_SIZE; j++) {
979 if (quad->inout.mask & (1 << j)) {
980 int x = itx + (j & 1);
981 int y = ity + (j >> 1);
982 for (i = 0; i < 4; i++) { /* loop over color chans */
983 tile->data.color[y][x][i] = quadColor[i][j];
984 }
985 }
986 }
987 }
988 }
989
990 static void
991 blend_noop(struct quad_stage *qs,
992 struct quad_header *quads[],
993 unsigned nr)
994 {
995 }
996
997
998 static void
999 choose_blend_quad(struct quad_stage *qs,
1000 struct quad_header *quads[],
1001 unsigned nr)
1002 {
1003 struct softpipe_context *softpipe = qs->softpipe;
1004 const struct pipe_blend_state *blend = softpipe->blend;
1005
1006 qs->run = blend_fallback;
1007
1008 if (softpipe->framebuffer.nr_cbufs == 0) {
1009 qs->run = blend_noop;
1010 }
1011 else if (!softpipe->blend->logicop_enable &&
1012 softpipe->blend->rt[0].colormask == 0xf &&
1013 softpipe->framebuffer.nr_cbufs == 1)
1014 {
1015 if (!blend->rt[0].blend_enable) {
1016 qs->run = single_output_color;
1017 }
1018 else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1019 blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1020 blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1021 {
1022 if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1023 if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1024 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1025 qs->run = blend_single_add_one_one;
1026 }
1027 else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1028 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1029 qs->run = blend_single_add_src_alpha_inv_src_alpha;
1030
1031 }
1032 }
1033 }
1034
1035 qs->run(qs, quads, nr);
1036 }
1037
1038
1039 static void blend_begin(struct quad_stage *qs)
1040 {
1041 qs->run = choose_blend_quad;
1042 }
1043
1044
1045 static void blend_destroy(struct quad_stage *qs)
1046 {
1047 FREE( qs );
1048 }
1049
1050
1051 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1052 {
1053 struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
1054
1055 stage->softpipe = softpipe;
1056 stage->begin = blend_begin;
1057 stage->run = choose_blend_quad;
1058 stage->destroy = blend_destroy;
1059
1060 return stage;
1061 }