s/Tungsten Graphics/VMware/
[mesa.git] / src / gallium / drivers / softpipe / sp_quad_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * quad blending
30 * \author Brian Paul
31 */
32
33 #include "pipe/p_defines.h"
34 #include "util/u_math.h"
35 #include "util/u_memory.h"
36 #include "util/u_format.h"
37 #include "util/u_dual_blend.h"
38 #include "sp_context.h"
39 #include "sp_state.h"
40 #include "sp_quad.h"
41 #include "sp_tile_cache.h"
42 #include "sp_quad_pipe.h"
43
44
45 enum format
46 {
47 RGBA,
48 RGB,
49 LUMINANCE,
50 LUMINANCE_ALPHA,
51 INTENSITY
52 };
53
54
55 /** Subclass of quad_stage */
56 struct blend_quad_stage
57 {
58 struct quad_stage base;
59 boolean clamp[PIPE_MAX_COLOR_BUFS]; /**< clamp colors to [0,1]? */
60 enum format base_format[PIPE_MAX_COLOR_BUFS];
61 enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
62 };
63
64
65 /** cast wrapper */
66 static INLINE struct blend_quad_stage *
67 blend_quad_stage(struct quad_stage *stage)
68 {
69 return (struct blend_quad_stage *) stage;
70 }
71
72
73 #define VEC4_COPY(DST, SRC) \
74 do { \
75 DST[0] = SRC[0]; \
76 DST[1] = SRC[1]; \
77 DST[2] = SRC[2]; \
78 DST[3] = SRC[3]; \
79 } while(0)
80
81 #define VEC4_SCALAR(DST, SRC) \
82 do { \
83 DST[0] = SRC; \
84 DST[1] = SRC; \
85 DST[2] = SRC; \
86 DST[3] = SRC; \
87 } while(0)
88
89 #define VEC4_ADD(R, A, B) \
90 do { \
91 R[0] = A[0] + B[0]; \
92 R[1] = A[1] + B[1]; \
93 R[2] = A[2] + B[2]; \
94 R[3] = A[3] + B[3]; \
95 } while (0)
96
97 #define VEC4_SUB(R, A, B) \
98 do { \
99 R[0] = A[0] - B[0]; \
100 R[1] = A[1] - B[1]; \
101 R[2] = A[2] - B[2]; \
102 R[3] = A[3] - B[3]; \
103 } while (0)
104
105 /** Add and limit result to ceiling of 1.0 */
106 #define VEC4_ADD_SAT(R, A, B) \
107 do { \
108 R[0] = A[0] + B[0]; if (R[0] > 1.0f) R[0] = 1.0f; \
109 R[1] = A[1] + B[1]; if (R[1] > 1.0f) R[1] = 1.0f; \
110 R[2] = A[2] + B[2]; if (R[2] > 1.0f) R[2] = 1.0f; \
111 R[3] = A[3] + B[3]; if (R[3] > 1.0f) R[3] = 1.0f; \
112 } while (0)
113
114 /** Subtract and limit result to floor of 0.0 */
115 #define VEC4_SUB_SAT(R, A, B) \
116 do { \
117 R[0] = A[0] - B[0]; if (R[0] < 0.0f) R[0] = 0.0f; \
118 R[1] = A[1] - B[1]; if (R[1] < 0.0f) R[1] = 0.0f; \
119 R[2] = A[2] - B[2]; if (R[2] < 0.0f) R[2] = 0.0f; \
120 R[3] = A[3] - B[3]; if (R[3] < 0.0f) R[3] = 0.0f; \
121 } while (0)
122
123 #define VEC4_MUL(R, A, B) \
124 do { \
125 R[0] = A[0] * B[0]; \
126 R[1] = A[1] * B[1]; \
127 R[2] = A[2] * B[2]; \
128 R[3] = A[3] * B[3]; \
129 } while (0)
130
131 #define VEC4_MIN(R, A, B) \
132 do { \
133 R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
134 R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
135 R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
136 R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
137 } while (0)
138
139 #define VEC4_MAX(R, A, B) \
140 do { \
141 R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
142 R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
143 R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
144 R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
145 } while (0)
146
147
148
149 static void
150 logicop_quad(struct quad_stage *qs,
151 float (*quadColor)[4],
152 float (*dest)[4])
153 {
154 struct softpipe_context *softpipe = qs->softpipe;
155 ubyte src[4][4], dst[4][4], res[4][4];
156 uint *src4 = (uint *) src;
157 uint *dst4 = (uint *) dst;
158 uint *res4 = (uint *) res;
159 uint j;
160
161
162 /* convert to ubyte */
163 for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
164 dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
165 dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
166 dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
167 dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
168
169 src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
170 src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
171 src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
172 src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
173 }
174
175 switch (softpipe->blend->logicop_func) {
176 case PIPE_LOGICOP_CLEAR:
177 for (j = 0; j < 4; j++)
178 res4[j] = 0;
179 break;
180 case PIPE_LOGICOP_NOR:
181 for (j = 0; j < 4; j++)
182 res4[j] = ~(src4[j] | dst4[j]);
183 break;
184 case PIPE_LOGICOP_AND_INVERTED:
185 for (j = 0; j < 4; j++)
186 res4[j] = ~src4[j] & dst4[j];
187 break;
188 case PIPE_LOGICOP_COPY_INVERTED:
189 for (j = 0; j < 4; j++)
190 res4[j] = ~src4[j];
191 break;
192 case PIPE_LOGICOP_AND_REVERSE:
193 for (j = 0; j < 4; j++)
194 res4[j] = src4[j] & ~dst4[j];
195 break;
196 case PIPE_LOGICOP_INVERT:
197 for (j = 0; j < 4; j++)
198 res4[j] = ~dst4[j];
199 break;
200 case PIPE_LOGICOP_XOR:
201 for (j = 0; j < 4; j++)
202 res4[j] = dst4[j] ^ src4[j];
203 break;
204 case PIPE_LOGICOP_NAND:
205 for (j = 0; j < 4; j++)
206 res4[j] = ~(src4[j] & dst4[j]);
207 break;
208 case PIPE_LOGICOP_AND:
209 for (j = 0; j < 4; j++)
210 res4[j] = src4[j] & dst4[j];
211 break;
212 case PIPE_LOGICOP_EQUIV:
213 for (j = 0; j < 4; j++)
214 res4[j] = ~(src4[j] ^ dst4[j]);
215 break;
216 case PIPE_LOGICOP_NOOP:
217 for (j = 0; j < 4; j++)
218 res4[j] = dst4[j];
219 break;
220 case PIPE_LOGICOP_OR_INVERTED:
221 for (j = 0; j < 4; j++)
222 res4[j] = ~src4[j] | dst4[j];
223 break;
224 case PIPE_LOGICOP_COPY:
225 for (j = 0; j < 4; j++)
226 res4[j] = src4[j];
227 break;
228 case PIPE_LOGICOP_OR_REVERSE:
229 for (j = 0; j < 4; j++)
230 res4[j] = src4[j] | ~dst4[j];
231 break;
232 case PIPE_LOGICOP_OR:
233 for (j = 0; j < 4; j++)
234 res4[j] = src4[j] | dst4[j];
235 break;
236 case PIPE_LOGICOP_SET:
237 for (j = 0; j < 4; j++)
238 res4[j] = ~0;
239 break;
240 default:
241 assert(0 && "invalid logicop mode");
242 }
243
244 for (j = 0; j < 4; j++) {
245 quadColor[j][0] = ubyte_to_float(res[j][0]);
246 quadColor[j][1] = ubyte_to_float(res[j][1]);
247 quadColor[j][2] = ubyte_to_float(res[j][2]);
248 quadColor[j][3] = ubyte_to_float(res[j][3]);
249 }
250 }
251
252
253
254 /**
255 * Do blending for a 2x2 quad for one color buffer.
256 * \param quadColor the incoming quad colors
257 * \param dest the destination/framebuffer quad colors
258 * \param const_blend_color the constant blend color
259 * \param blend_index which set of blending terms to use
260 */
261 static void
262 blend_quad(struct quad_stage *qs,
263 float (*quadColor)[4],
264 float (*quadColor2)[4],
265 float (*dest)[4],
266 const float const_blend_color[4],
267 unsigned blend_index)
268 {
269 static const float zero[4] = { 0, 0, 0, 0 };
270 static const float one[4] = { 1, 1, 1, 1 };
271 struct softpipe_context *softpipe = qs->softpipe;
272 float source[4][TGSI_QUAD_SIZE] = { { 0 } };
273 float blend_dest[4][TGSI_QUAD_SIZE];
274
275 /*
276 * Compute src/first term RGB
277 */
278 switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
279 case PIPE_BLENDFACTOR_ONE:
280 VEC4_COPY(source[0], quadColor[0]); /* R */
281 VEC4_COPY(source[1], quadColor[1]); /* G */
282 VEC4_COPY(source[2], quadColor[2]); /* B */
283 break;
284 case PIPE_BLENDFACTOR_SRC_COLOR:
285 VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
286 VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
287 VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
288 break;
289 case PIPE_BLENDFACTOR_SRC_ALPHA:
290 {
291 const float *alpha = quadColor[3];
292 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
293 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
294 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
295 }
296 break;
297 case PIPE_BLENDFACTOR_DST_COLOR:
298 VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
299 VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
300 VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
301 break;
302 case PIPE_BLENDFACTOR_DST_ALPHA:
303 {
304 const float *alpha = dest[3];
305 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
306 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
307 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
308 }
309 break;
310 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
311 {
312 const float *alpha = quadColor[3];
313 float diff[4], temp[4];
314 VEC4_SUB(diff, one, dest[3]);
315 VEC4_MIN(temp, alpha, diff);
316 VEC4_MUL(source[0], quadColor[0], temp); /* R */
317 VEC4_MUL(source[1], quadColor[1], temp); /* G */
318 VEC4_MUL(source[2], quadColor[2], temp); /* B */
319 }
320 break;
321 case PIPE_BLENDFACTOR_CONST_COLOR:
322 {
323 float comp[4];
324 VEC4_SCALAR(comp, const_blend_color[0]); /* R */
325 VEC4_MUL(source[0], quadColor[0], comp); /* R */
326 VEC4_SCALAR(comp, const_blend_color[1]); /* G */
327 VEC4_MUL(source[1], quadColor[1], comp); /* G */
328 VEC4_SCALAR(comp, const_blend_color[2]); /* B */
329 VEC4_MUL(source[2], quadColor[2], comp); /* B */
330 }
331 break;
332 case PIPE_BLENDFACTOR_CONST_ALPHA:
333 {
334 float alpha[4];
335 VEC4_SCALAR(alpha, const_blend_color[3]);
336 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
337 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
338 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
339 }
340 break;
341 case PIPE_BLENDFACTOR_SRC1_COLOR:
342 VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
343 VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
344 VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */
345 break;
346 case PIPE_BLENDFACTOR_SRC1_ALPHA:
347 {
348 const float *alpha = quadColor2[3];
349 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
350 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
351 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
352 }
353 break;
354 case PIPE_BLENDFACTOR_ZERO:
355 VEC4_COPY(source[0], zero); /* R */
356 VEC4_COPY(source[1], zero); /* G */
357 VEC4_COPY(source[2], zero); /* B */
358 break;
359 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
360 {
361 float inv_comp[4];
362 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
363 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
364 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
365 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
366 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
367 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
368 }
369 break;
370 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
371 {
372 float inv_alpha[4];
373 VEC4_SUB(inv_alpha, one, quadColor[3]);
374 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
375 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
376 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
377 }
378 break;
379 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
380 {
381 float inv_alpha[4];
382 VEC4_SUB(inv_alpha, one, dest[3]);
383 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
384 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
385 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
386 }
387 break;
388 case PIPE_BLENDFACTOR_INV_DST_COLOR:
389 {
390 float inv_comp[4];
391 VEC4_SUB(inv_comp, one, dest[0]); /* R */
392 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
393 VEC4_SUB(inv_comp, one, dest[1]); /* G */
394 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
395 VEC4_SUB(inv_comp, one, dest[2]); /* B */
396 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
397 }
398 break;
399 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
400 {
401 float inv_comp[4];
402 /* R */
403 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
404 VEC4_MUL(source[0], quadColor[0], inv_comp);
405 /* G */
406 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
407 VEC4_MUL(source[1], quadColor[1], inv_comp);
408 /* B */
409 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
410 VEC4_MUL(source[2], quadColor[2], inv_comp);
411 }
412 break;
413 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
414 {
415 float inv_alpha[4];
416 VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
417 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
418 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
419 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
420 }
421 break;
422 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
423 {
424 float inv_comp[4];
425 VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
426 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
427 VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
428 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
429 VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
430 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
431 }
432 break;
433 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
434 {
435 float inv_alpha[4];
436 VEC4_SUB(inv_alpha, one, quadColor2[3]);
437 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
438 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
439 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
440 }
441 break;
442 default:
443 assert(0 && "invalid rgb src factor");
444 }
445
446 /*
447 * Compute src/first term A
448 */
449 switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
450 case PIPE_BLENDFACTOR_ONE:
451 VEC4_COPY(source[3], quadColor[3]); /* A */
452 break;
453 case PIPE_BLENDFACTOR_SRC_COLOR:
454 /* fall-through */
455 case PIPE_BLENDFACTOR_SRC_ALPHA:
456 {
457 const float *alpha = quadColor[3];
458 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
459 }
460 break;
461 case PIPE_BLENDFACTOR_DST_COLOR:
462 /* fall-through */
463 case PIPE_BLENDFACTOR_DST_ALPHA:
464 VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
465 break;
466 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
467 /* multiply alpha by 1.0 */
468 VEC4_COPY(source[3], quadColor[3]); /* A */
469 break;
470 case PIPE_BLENDFACTOR_CONST_COLOR:
471 /* fall-through */
472 case PIPE_BLENDFACTOR_CONST_ALPHA:
473 {
474 float comp[4];
475 VEC4_SCALAR(comp, const_blend_color[3]); /* A */
476 VEC4_MUL(source[3], quadColor[3], comp); /* A */
477 }
478 break;
479 case PIPE_BLENDFACTOR_ZERO:
480 VEC4_COPY(source[3], zero); /* A */
481 break;
482 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
483 /* fall-through */
484 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
485 {
486 float inv_alpha[4];
487 VEC4_SUB(inv_alpha, one, quadColor[3]);
488 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
489 }
490 break;
491 case PIPE_BLENDFACTOR_INV_DST_COLOR:
492 /* fall-through */
493 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
494 {
495 float inv_alpha[4];
496 VEC4_SUB(inv_alpha, one, dest[3]);
497 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
498 }
499 break;
500 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
501 /* fall-through */
502 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
503 {
504 float inv_comp[4];
505 /* A */
506 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
507 VEC4_MUL(source[3], quadColor[3], inv_comp);
508 }
509 break;
510 case PIPE_BLENDFACTOR_SRC1_COLOR:
511 /* fall-through */
512 case PIPE_BLENDFACTOR_SRC1_ALPHA:
513 {
514 const float *alpha = quadColor2[3];
515 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
516 }
517 break;
518 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
519 /* fall-through */
520 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
521 {
522 float inv_alpha[4];
523 VEC4_SUB(inv_alpha, one, quadColor2[3]);
524 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
525 }
526 break;
527 default:
528 assert(0 && "invalid alpha src factor");
529 }
530
531 /* Save the original dest for use in masking */
532 VEC4_COPY(blend_dest[0], dest[0]);
533 VEC4_COPY(blend_dest[1], dest[1]);
534 VEC4_COPY(blend_dest[2], dest[2]);
535 VEC4_COPY(blend_dest[3], dest[3]);
536
537
538 /*
539 * Compute blend_dest/second term RGB
540 */
541 switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
542 case PIPE_BLENDFACTOR_ONE:
543 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
544 break;
545 case PIPE_BLENDFACTOR_SRC_COLOR:
546 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
547 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
548 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
549 break;
550 case PIPE_BLENDFACTOR_SRC_ALPHA:
551 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
552 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
553 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
554 break;
555 case PIPE_BLENDFACTOR_DST_ALPHA:
556 VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
557 VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
558 VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
559 break;
560 case PIPE_BLENDFACTOR_DST_COLOR:
561 VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
562 VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
563 VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
564 break;
565 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
566 {
567 const float *alpha = quadColor[3];
568 float diff[4], temp[4];
569 VEC4_SUB(diff, one, blend_dest[3]);
570 VEC4_MIN(temp, alpha, diff);
571 VEC4_MUL(blend_dest[0], blend_dest[0], temp); /* R */
572 VEC4_MUL(blend_dest[1], blend_dest[1], temp); /* G */
573 VEC4_MUL(blend_dest[2], blend_dest[2], temp); /* B */
574 }
575 break;
576 case PIPE_BLENDFACTOR_CONST_COLOR:
577 {
578 float comp[4];
579 VEC4_SCALAR(comp, const_blend_color[0]); /* R */
580 VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
581 VEC4_SCALAR(comp, const_blend_color[1]); /* G */
582 VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
583 VEC4_SCALAR(comp, const_blend_color[2]); /* B */
584 VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
585 }
586 break;
587 case PIPE_BLENDFACTOR_CONST_ALPHA:
588 {
589 float comp[4];
590 VEC4_SCALAR(comp, const_blend_color[3]); /* A */
591 VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
592 VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
593 VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
594 }
595 break;
596 case PIPE_BLENDFACTOR_ZERO:
597 VEC4_COPY(blend_dest[0], zero); /* R */
598 VEC4_COPY(blend_dest[1], zero); /* G */
599 VEC4_COPY(blend_dest[2], zero); /* B */
600 break;
601 case PIPE_BLENDFACTOR_SRC1_COLOR:
602 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
603 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
604 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
605 break;
606 case PIPE_BLENDFACTOR_SRC1_ALPHA:
607 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
608 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
609 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
610 break;
611 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
612 {
613 float inv_comp[4];
614 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
615 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
616 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
617 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
618 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
619 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
620 }
621 break;
622 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
623 {
624 float one_minus_alpha[TGSI_QUAD_SIZE];
625 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
626 VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
627 VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
628 VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
629 }
630 break;
631 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
632 {
633 float inv_comp[4];
634 VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
635 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
636 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
637 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
638 }
639 break;
640 case PIPE_BLENDFACTOR_INV_DST_COLOR:
641 {
642 float inv_comp[4];
643 VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
644 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
645 VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
646 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
647 VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
648 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
649 }
650 break;
651 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
652 {
653 float inv_comp[4];
654 /* R */
655 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
656 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
657 /* G */
658 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
659 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
660 /* B */
661 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
662 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
663 }
664 break;
665 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
666 {
667 float inv_comp[4];
668 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
669 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
670 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
671 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
672 }
673 break;
674 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
675 {
676 float inv_comp[4];
677 VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
678 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
679 VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
680 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
681 VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
682 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
683 }
684 break;
685 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
686 {
687 float one_minus_alpha[TGSI_QUAD_SIZE];
688 VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
689 VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
690 VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
691 VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
692 }
693 break;
694 default:
695 assert(0 && "invalid rgb dst factor");
696 }
697
698 /*
699 * Compute blend_dest/second term A
700 */
701 switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
702 case PIPE_BLENDFACTOR_ONE:
703 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
704 break;
705 case PIPE_BLENDFACTOR_SRC_COLOR:
706 /* fall-through */
707 case PIPE_BLENDFACTOR_SRC_ALPHA:
708 VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
709 break;
710 case PIPE_BLENDFACTOR_DST_COLOR:
711 /* fall-through */
712 case PIPE_BLENDFACTOR_DST_ALPHA:
713 VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
714 break;
715 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
716 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */
717 break;
718 case PIPE_BLENDFACTOR_CONST_COLOR:
719 /* fall-through */
720 case PIPE_BLENDFACTOR_CONST_ALPHA:
721 {
722 float comp[4];
723 VEC4_SCALAR(comp, const_blend_color[3]); /* A */
724 VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
725 }
726 break;
727 case PIPE_BLENDFACTOR_ZERO:
728 VEC4_COPY(blend_dest[3], zero); /* A */
729 break;
730 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
731 /* fall-through */
732 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
733 {
734 float one_minus_alpha[TGSI_QUAD_SIZE];
735 VEC4_SUB(one_minus_alpha, one, quadColor[3]);
736 VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
737 }
738 break;
739 case PIPE_BLENDFACTOR_INV_DST_COLOR:
740 /* fall-through */
741 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
742 {
743 float inv_comp[4];
744 VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
745 VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
746 }
747 break;
748 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
749 /* fall-through */
750 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
751 {
752 float inv_comp[4];
753 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
754 VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
755 }
756 break;
757 case PIPE_BLENDFACTOR_SRC1_COLOR:
758 /* fall-through */
759 case PIPE_BLENDFACTOR_SRC1_ALPHA:
760 VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
761 break;
762 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
763 /* fall-through */
764 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
765 {
766 float one_minus_alpha[TGSI_QUAD_SIZE];
767 VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
768 VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
769 }
770 break;
771 default:
772 assert(0 && "invalid alpha dst factor");
773 }
774
775 /*
776 * Combine RGB terms
777 */
778 switch (softpipe->blend->rt[blend_index].rgb_func) {
779 case PIPE_BLEND_ADD:
780 VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
781 VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
782 VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
783 break;
784 case PIPE_BLEND_SUBTRACT:
785 VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
786 VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
787 VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
788 break;
789 case PIPE_BLEND_REVERSE_SUBTRACT:
790 VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
791 VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
792 VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
793 break;
794 case PIPE_BLEND_MIN:
795 VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
796 VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
797 VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
798 break;
799 case PIPE_BLEND_MAX:
800 VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
801 VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
802 VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
803 break;
804 default:
805 assert(0 && "invalid rgb blend func");
806 }
807
808 /*
809 * Combine A terms
810 */
811 switch (softpipe->blend->rt[blend_index].alpha_func) {
812 case PIPE_BLEND_ADD:
813 VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
814 break;
815 case PIPE_BLEND_SUBTRACT:
816 VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
817 break;
818 case PIPE_BLEND_REVERSE_SUBTRACT:
819 VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
820 break;
821 case PIPE_BLEND_MIN:
822 VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
823 break;
824 case PIPE_BLEND_MAX:
825 VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
826 break;
827 default:
828 assert(0 && "invalid alpha blend func");
829 }
830 }
831
832 static void
833 colormask_quad(unsigned colormask,
834 float (*quadColor)[4],
835 float (*dest)[4])
836 {
837 /* R */
838 if (!(colormask & PIPE_MASK_R))
839 COPY_4V(quadColor[0], dest[0]);
840
841 /* G */
842 if (!(colormask & PIPE_MASK_G))
843 COPY_4V(quadColor[1], dest[1]);
844
845 /* B */
846 if (!(colormask & PIPE_MASK_B))
847 COPY_4V(quadColor[2], dest[2]);
848
849 /* A */
850 if (!(colormask & PIPE_MASK_A))
851 COPY_4V(quadColor[3], dest[3]);
852 }
853
854
855 /**
856 * Clamp all colors in a quad to [0, 1]
857 */
858 static void
859 clamp_colors(float (*quadColor)[4])
860 {
861 unsigned i, j;
862
863 for (i = 0; i < 4; i++) {
864 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
865 quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
866 }
867 }
868 }
869
870
871 /**
872 * If we're drawing to a luminance, luminance/alpha or intensity surface
873 * we have to adjust (rebase) the fragment/quad colors before writing them
874 * to the tile cache. The tile cache always stores RGBA colors but if
875 * we're caching a L/A surface (for example) we need to be sure that R=G=B
876 * so that subsequent reads from the surface cache appear to return L/A
877 * values.
878 * The piglit fbo-blending-formats test will exercise this.
879 */
880 static void
881 rebase_colors(enum format base_format, float (*quadColor)[4])
882 {
883 unsigned i;
884
885 switch (base_format) {
886 case RGB:
887 for (i = 0; i < 4; i++) {
888 /* A = 1 */
889 quadColor[3][i] = 1.0F;
890 }
891 break;
892 case LUMINANCE:
893 for (i = 0; i < 4; i++) {
894 /* B = G = R */
895 quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
896 /* A = 1 */
897 quadColor[3][i] = 1.0F;
898 }
899 break;
900 case LUMINANCE_ALPHA:
901 for (i = 0; i < 4; i++) {
902 /* B = G = R */
903 quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
904 }
905 break;
906 case INTENSITY:
907 for (i = 0; i < 4; i++) {
908 /* A = B = G = R */
909 quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
910 }
911 break;
912 default:
913 ; /* nothing */
914 }
915 }
916
917 static void
918 blend_fallback(struct quad_stage *qs,
919 struct quad_header *quads[],
920 unsigned nr)
921 {
922 const struct blend_quad_stage *bqs = blend_quad_stage(qs);
923 struct softpipe_context *softpipe = qs->softpipe;
924 const struct pipe_blend_state *blend = softpipe->blend;
925 unsigned cbuf;
926 boolean write_all;
927
928 write_all = softpipe->fs_variant->info.color0_writes_all_cbufs;
929
930 for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
931 if (softpipe->framebuffer.cbufs[cbuf]) {
932 /* which blend/mask state index to use: */
933 const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
934 float dest[4][TGSI_QUAD_SIZE];
935 struct softpipe_cached_tile *tile
936 = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
937 quads[0]->input.x0,
938 quads[0]->input.y0);
939 const boolean clamp = bqs->clamp[cbuf];
940 const float *blend_color;
941 const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
942 uint q, i, j;
943
944 if (clamp)
945 blend_color = softpipe->blend_color_clamped.color;
946 else
947 blend_color = softpipe->blend_color.color;
948
949 for (q = 0; q < nr; q++) {
950 struct quad_header *quad = quads[q];
951 float (*quadColor)[4];
952 float (*quadColor2)[4] = NULL;
953 float temp_quad_color[TGSI_QUAD_SIZE][4];
954 const int itx = (quad->input.x0 & (TILE_SIZE-1));
955 const int ity = (quad->input.y0 & (TILE_SIZE-1));
956
957 if (write_all) {
958 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
959 for (i = 0; i < 4; i++) {
960 temp_quad_color[i][j] = quad->output.color[0][i][j];
961 }
962 }
963 quadColor = temp_quad_color;
964 } else {
965 quadColor = quad->output.color[cbuf];
966 if (dual_source_blend)
967 quadColor2 = quad->output.color[cbuf + 1];
968 }
969
970 /* If fixed-point dest color buffer, need to clamp the incoming
971 * fragment colors now.
972 */
973 if (clamp || softpipe->rasterizer->clamp_fragment_color) {
974 clamp_colors(quadColor);
975 }
976
977 /* get/swizzle dest colors
978 */
979 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
980 int x = itx + (j & 1);
981 int y = ity + (j >> 1);
982 for (i = 0; i < 4; i++) {
983 dest[i][j] = tile->data.color[y][x][i];
984 }
985 }
986
987
988 if (blend->logicop_enable) {
989 if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
990 logicop_quad( qs, quadColor, dest );
991 }
992 }
993 else if (blend->rt[blend_buf].blend_enable) {
994 blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
995
996 /* If fixed-point dest color buffer, need to clamp the outgoing
997 * fragment colors now.
998 */
999 if (clamp) {
1000 clamp_colors(quadColor);
1001 }
1002 }
1003
1004 rebase_colors(bqs->base_format[cbuf], quadColor);
1005
1006 if (blend->rt[blend_buf].colormask != 0xf)
1007 colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
1008
1009 /* Output color values
1010 */
1011 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1012 if (quad->inout.mask & (1 << j)) {
1013 int x = itx + (j & 1);
1014 int y = ity + (j >> 1);
1015 for (i = 0; i < 4; i++) { /* loop over color chans */
1016 tile->data.color[y][x][i] = quadColor[i][j];
1017 }
1018 }
1019 }
1020 }
1021 }
1022 }
1023 }
1024
1025
1026 static void
1027 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
1028 struct quad_header *quads[],
1029 unsigned nr)
1030 {
1031 const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1032 static const float one[4] = { 1, 1, 1, 1 };
1033 float one_minus_alpha[TGSI_QUAD_SIZE];
1034 float dest[4][TGSI_QUAD_SIZE];
1035 float source[4][TGSI_QUAD_SIZE];
1036 uint i, j, q;
1037
1038 struct softpipe_cached_tile *tile
1039 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1040 quads[0]->input.x0,
1041 quads[0]->input.y0);
1042
1043 for (q = 0; q < nr; q++) {
1044 struct quad_header *quad = quads[q];
1045 float (*quadColor)[4] = quad->output.color[0];
1046 const float *alpha = quadColor[3];
1047 const int itx = (quad->input.x0 & (TILE_SIZE-1));
1048 const int ity = (quad->input.y0 & (TILE_SIZE-1));
1049
1050 /* get/swizzle dest colors */
1051 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1052 int x = itx + (j & 1);
1053 int y = ity + (j >> 1);
1054 for (i = 0; i < 4; i++) {
1055 dest[i][j] = tile->data.color[y][x][i];
1056 }
1057 }
1058
1059 /* If fixed-point dest color buffer, need to clamp the incoming
1060 * fragment colors now.
1061 */
1062 if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1063 clamp_colors(quadColor);
1064 }
1065
1066 VEC4_MUL(source[0], quadColor[0], alpha); /* R */
1067 VEC4_MUL(source[1], quadColor[1], alpha); /* G */
1068 VEC4_MUL(source[2], quadColor[2], alpha); /* B */
1069 VEC4_MUL(source[3], quadColor[3], alpha); /* A */
1070
1071 VEC4_SUB(one_minus_alpha, one, alpha);
1072 VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
1073 VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
1074 VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
1075 VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
1076
1077 VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
1078 VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
1079 VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
1080 VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
1081
1082 /* If fixed-point dest color buffer, need to clamp the outgoing
1083 * fragment colors now.
1084 */
1085 if (bqs->clamp[0]) {
1086 clamp_colors(quadColor);
1087 }
1088
1089 rebase_colors(bqs->base_format[0], quadColor);
1090
1091 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1092 if (quad->inout.mask & (1 << j)) {
1093 int x = itx + (j & 1);
1094 int y = ity + (j >> 1);
1095 for (i = 0; i < 4; i++) { /* loop over color chans */
1096 tile->data.color[y][x][i] = quadColor[i][j];
1097 }
1098 }
1099 }
1100 }
1101 }
1102
1103 static void
1104 blend_single_add_one_one(struct quad_stage *qs,
1105 struct quad_header *quads[],
1106 unsigned nr)
1107 {
1108 const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1109 float dest[4][TGSI_QUAD_SIZE];
1110 uint i, j, q;
1111
1112 struct softpipe_cached_tile *tile
1113 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1114 quads[0]->input.x0,
1115 quads[0]->input.y0);
1116
1117 for (q = 0; q < nr; q++) {
1118 struct quad_header *quad = quads[q];
1119 float (*quadColor)[4] = quad->output.color[0];
1120 const int itx = (quad->input.x0 & (TILE_SIZE-1));
1121 const int ity = (quad->input.y0 & (TILE_SIZE-1));
1122
1123 /* get/swizzle dest colors */
1124 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1125 int x = itx + (j & 1);
1126 int y = ity + (j >> 1);
1127 for (i = 0; i < 4; i++) {
1128 dest[i][j] = tile->data.color[y][x][i];
1129 }
1130 }
1131
1132 /* If fixed-point dest color buffer, need to clamp the incoming
1133 * fragment colors now.
1134 */
1135 if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1136 clamp_colors(quadColor);
1137 }
1138
1139 VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
1140 VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
1141 VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
1142 VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
1143
1144 /* If fixed-point dest color buffer, need to clamp the outgoing
1145 * fragment colors now.
1146 */
1147 if (bqs->clamp[0]) {
1148 clamp_colors(quadColor);
1149 }
1150
1151 rebase_colors(bqs->base_format[0], quadColor);
1152
1153 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1154 if (quad->inout.mask & (1 << j)) {
1155 int x = itx + (j & 1);
1156 int y = ity + (j >> 1);
1157 for (i = 0; i < 4; i++) { /* loop over color chans */
1158 tile->data.color[y][x][i] = quadColor[i][j];
1159 }
1160 }
1161 }
1162 }
1163 }
1164
1165
1166 /**
1167 * Just copy the quad color to the framebuffer tile (respecting the writemask),
1168 * for one color buffer.
1169 * Clamping will be done, if needed (depending on the color buffer's
1170 * datatype) when we write/pack the colors later.
1171 */
1172 static void
1173 single_output_color(struct quad_stage *qs,
1174 struct quad_header *quads[],
1175 unsigned nr)
1176 {
1177 const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1178 uint i, j, q;
1179
1180 struct softpipe_cached_tile *tile
1181 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1182 quads[0]->input.x0,
1183 quads[0]->input.y0);
1184
1185 for (q = 0; q < nr; q++) {
1186 struct quad_header *quad = quads[q];
1187 float (*quadColor)[4] = quad->output.color[0];
1188 const int itx = (quad->input.x0 & (TILE_SIZE-1));
1189 const int ity = (quad->input.y0 & (TILE_SIZE-1));
1190
1191 if (qs->softpipe->rasterizer->clamp_fragment_color)
1192 clamp_colors(quadColor);
1193
1194 rebase_colors(bqs->base_format[0], quadColor);
1195
1196 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1197 if (quad->inout.mask & (1 << j)) {
1198 int x = itx + (j & 1);
1199 int y = ity + (j >> 1);
1200 for (i = 0; i < 4; i++) { /* loop over color chans */
1201 tile->data.color[y][x][i] = quadColor[i][j];
1202 }
1203 }
1204 }
1205 }
1206 }
1207
1208 static void
1209 blend_noop(struct quad_stage *qs,
1210 struct quad_header *quads[],
1211 unsigned nr)
1212 {
1213 }
1214
1215
1216 static void
1217 choose_blend_quad(struct quad_stage *qs,
1218 struct quad_header *quads[],
1219 unsigned nr)
1220 {
1221 struct blend_quad_stage *bqs = blend_quad_stage(qs);
1222 struct softpipe_context *softpipe = qs->softpipe;
1223 const struct pipe_blend_state *blend = softpipe->blend;
1224 unsigned i;
1225
1226 qs->run = blend_fallback;
1227
1228 if (softpipe->framebuffer.nr_cbufs == 0) {
1229 qs->run = blend_noop;
1230 }
1231 else if (!softpipe->blend->logicop_enable &&
1232 softpipe->blend->rt[0].colormask == 0xf &&
1233 softpipe->framebuffer.nr_cbufs == 1)
1234 {
1235 if (!blend->rt[0].blend_enable) {
1236 qs->run = single_output_color;
1237 }
1238 else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1239 blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1240 blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1241 {
1242 if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1243 if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1244 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1245 qs->run = blend_single_add_one_one;
1246 }
1247 else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1248 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1249 qs->run = blend_single_add_src_alpha_inv_src_alpha;
1250
1251 }
1252 }
1253 }
1254
1255 /* For each color buffer, determine if the buffer has destination alpha and
1256 * whether color clamping is needed.
1257 */
1258 for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
1259 if (softpipe->framebuffer.cbufs[i]) {
1260 const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format;
1261 const struct util_format_description *desc =
1262 util_format_description(format);
1263 /* assuming all or no color channels are normalized: */
1264 bqs->clamp[i] = desc->channel[0].normalized;
1265 bqs->format_type[i] = desc->channel[0].type;
1266
1267 if (util_format_is_intensity(format))
1268 bqs->base_format[i] = INTENSITY;
1269 else if (util_format_is_luminance(format))
1270 bqs->base_format[i] = LUMINANCE;
1271 else if (util_format_is_luminance_alpha(format))
1272 bqs->base_format[i] = LUMINANCE_ALPHA;
1273 else if (!util_format_has_alpha(format))
1274 bqs->base_format[i] = RGB;
1275 else
1276 bqs->base_format[i] = RGBA;
1277 }
1278 }
1279
1280 qs->run(qs, quads, nr);
1281 }
1282
1283
1284 static void blend_begin(struct quad_stage *qs)
1285 {
1286 qs->run = choose_blend_quad;
1287 }
1288
1289
1290 static void blend_destroy(struct quad_stage *qs)
1291 {
1292 FREE( qs );
1293 }
1294
1295
1296 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1297 {
1298 struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage);
1299
1300 if (!stage)
1301 return NULL;
1302
1303 stage->base.softpipe = softpipe;
1304 stage->base.begin = blend_begin;
1305 stage->base.run = choose_blend_quad;
1306 stage->base.destroy = blend_destroy;
1307
1308 return &stage->base;
1309 }