1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * Generate SPU per-fragment code (actually per-quad code).
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "rtasm/rtasm_ppc_spe.h"
39 #include "cell_context.h"
40 #include "cell_gen_fragment.h"
44 /** Do extra optimizations? */
45 #define OPTIMIZATIONS 1
49 * Generate SPE code to perform Z/depth testing.
51 * \param dsa Gallium depth/stencil/alpha state to gen code for
52 * \param f SPE function to append instruction onto.
53 * \param mask_reg register containing quad/pixel "alive" mask (in/out)
54 * \param ifragZ_reg register containing integer fragment Z values (in)
55 * \param ifbZ_reg register containing integer frame buffer Z values (in/out)
56 * \param zmask_reg register containing result of Z test/comparison (out)
59 gen_depth_test(const struct pipe_depth_stencil_alpha_state
*dsa
,
60 struct spe_function
*f
,
61 int mask_reg
, int ifragZ_reg
, int ifbZ_reg
, int zmask_reg
)
63 /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_
64 * quantities. This only makes a difference for 32-bit Z values though.
66 ASSERT(dsa
->depth
.enabled
);
68 switch (dsa
->depth
.func
) {
70 /* zmask = (ifragZ == ref) */
71 spe_ceq(f
, zmask_reg
, ifragZ_reg
, ifbZ_reg
);
72 /* mask = (mask & zmask) */
73 spe_and(f
, mask_reg
, mask_reg
, zmask_reg
);
76 case PIPE_FUNC_NOTEQUAL
:
77 /* zmask = (ifragZ == ref) */
78 spe_ceq(f
, zmask_reg
, ifragZ_reg
, ifbZ_reg
);
79 /* mask = (mask & ~zmask) */
80 spe_andc(f
, mask_reg
, mask_reg
, zmask_reg
);
83 case PIPE_FUNC_GREATER
:
84 /* zmask = (ifragZ > ref) */
85 spe_clgt(f
, zmask_reg
, ifragZ_reg
, ifbZ_reg
);
86 /* mask = (mask & zmask) */
87 spe_and(f
, mask_reg
, mask_reg
, zmask_reg
);
91 /* zmask = (ref > ifragZ) */
92 spe_clgt(f
, zmask_reg
, ifbZ_reg
, ifragZ_reg
);
93 /* mask = (mask & zmask) */
94 spe_and(f
, mask_reg
, mask_reg
, zmask_reg
);
97 case PIPE_FUNC_LEQUAL
:
98 /* zmask = (ifragZ > ref) */
99 spe_clgt(f
, zmask_reg
, ifragZ_reg
, ifbZ_reg
);
100 /* mask = (mask & ~zmask) */
101 spe_andc(f
, mask_reg
, mask_reg
, zmask_reg
);
104 case PIPE_FUNC_GEQUAL
:
105 /* zmask = (ref > ifragZ) */
106 spe_clgt(f
, zmask_reg
, ifbZ_reg
, ifragZ_reg
);
107 /* mask = (mask & ~zmask) */
108 spe_andc(f
, mask_reg
, mask_reg
, zmask_reg
);
111 case PIPE_FUNC_NEVER
:
112 spe_il(f
, mask_reg
, 0); /* mask = {0,0,0,0} */
113 spe_move(f
, zmask_reg
, mask_reg
); /* zmask = mask */
116 case PIPE_FUNC_ALWAYS
:
118 spe_il(f
, zmask_reg
, ~0); /* zmask = {~0,~0,~0,~0} */
126 if (dsa
->depth
.writemask
) {
128 * If (ztest passed) {
129 * framebufferZ = fragmentZ;
132 * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
134 spe_selb(f
, ifbZ_reg
, ifbZ_reg
, ifragZ_reg
, mask_reg
);
140 * Generate SPE code to perform alpha testing.
142 * \param dsa Gallium depth/stencil/alpha state to gen code for
143 * \param f SPE function to append instruction onto.
144 * \param mask_reg register containing quad/pixel "alive" mask (in/out)
145 * \param fragA_reg register containing four fragment alpha values (in)
148 gen_alpha_test(const struct pipe_depth_stencil_alpha_state
*dsa
,
149 struct spe_function
*f
, int mask_reg
, int fragA_reg
)
151 int ref_reg
= spe_allocate_available_register(f
);
152 int amask_reg
= spe_allocate_available_register(f
);
154 ASSERT(dsa
->alpha
.enabled
);
156 if ((dsa
->alpha
.func
!= PIPE_FUNC_NEVER
) &&
157 (dsa
->alpha
.func
!= PIPE_FUNC_ALWAYS
)) {
158 /* load/splat the alpha reference float value */
159 spe_load_float(f
, ref_reg
, dsa
->alpha
.ref
);
162 /* emit code to do the alpha comparison, updating 'mask' */
163 switch (dsa
->alpha
.func
) {
164 case PIPE_FUNC_EQUAL
:
165 /* amask = (fragA == ref) */
166 spe_fceq(f
, amask_reg
, fragA_reg
, ref_reg
);
167 /* mask = (mask & amask) */
168 spe_and(f
, mask_reg
, mask_reg
, amask_reg
);
171 case PIPE_FUNC_NOTEQUAL
:
172 /* amask = (fragA == ref) */
173 spe_fceq(f
, amask_reg
, fragA_reg
, ref_reg
);
174 /* mask = (mask & ~amask) */
175 spe_andc(f
, mask_reg
, mask_reg
, amask_reg
);
178 case PIPE_FUNC_GREATER
:
179 /* amask = (fragA > ref) */
180 spe_fcgt(f
, amask_reg
, fragA_reg
, ref_reg
);
181 /* mask = (mask & amask) */
182 spe_and(f
, mask_reg
, mask_reg
, amask_reg
);
186 /* amask = (ref > fragA) */
187 spe_fcgt(f
, amask_reg
, ref_reg
, fragA_reg
);
188 /* mask = (mask & amask) */
189 spe_and(f
, mask_reg
, mask_reg
, amask_reg
);
192 case PIPE_FUNC_LEQUAL
:
193 /* amask = (fragA > ref) */
194 spe_fcgt(f
, amask_reg
, fragA_reg
, ref_reg
);
195 /* mask = (mask & ~amask) */
196 spe_andc(f
, mask_reg
, mask_reg
, amask_reg
);
199 case PIPE_FUNC_GEQUAL
:
200 /* amask = (ref > fragA) */
201 spe_fcgt(f
, amask_reg
, ref_reg
, fragA_reg
);
202 /* mask = (mask & ~amask) */
203 spe_andc(f
, mask_reg
, mask_reg
, amask_reg
);
206 case PIPE_FUNC_NEVER
:
207 spe_il(f
, mask_reg
, 0); /* mask = [0,0,0,0] */
210 case PIPE_FUNC_ALWAYS
:
211 /* no-op, mask unchanged */
220 /* if mask == {0,0,0,0} we're all done, return */
222 /* re-use amask reg here */
223 int tmp_reg
= amask_reg
;
224 /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
225 spe_orx(f
, tmp_reg
, mask_reg
);
226 /* if tmp[0] == 0 then return from function call */
227 spe_biz(f
, tmp_reg
, SPE_REG_RA
, 0, 0);
231 spe_release_register(f
, ref_reg
);
232 spe_release_register(f
, amask_reg
);
235 /* This pair of functions is used inline to allocate and deallocate
236 * optional constant registers. Once a constant is discovered to be
237 * needed, we will likely need it again, so we don't want to deallocate
238 * it and have to allocate and load it again unnecessarily.
241 setup_const_register(struct spe_function
*f
, boolean
*is_already_set
, unsigned int *r
, float value
)
243 if (*is_already_set
) return;
244 *r
= spe_allocate_available_register(f
);
245 spe_load_float(f
, *r
, value
);
246 *is_already_set
= true;
250 release_const_register(struct spe_function
*f
, boolean
*is_already_set
, unsigned int r
)
252 if (!*is_already_set
) return;
253 spe_release_register(f
, r
);
254 *is_already_set
= false;
258 * Generate SPE code to implement the given blend mode for a quad of pixels.
259 * \param f SPE function to append instruction onto.
260 * \param fragR_reg register with fragment red values (float) (in/out)
261 * \param fragG_reg register with fragment green values (float) (in/out)
262 * \param fragB_reg register with fragment blue values (float) (in/out)
263 * \param fragA_reg register with fragment alpha values (float) (in/out)
264 * \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
267 gen_blend(const struct pipe_blend_state
*blend
,
268 const struct pipe_blend_color
*blend_color
,
269 struct spe_function
*f
,
270 enum pipe_format color_format
,
271 int fragR_reg
, int fragG_reg
, int fragB_reg
, int fragA_reg
,
274 int term1R_reg
= spe_allocate_available_register(f
);
275 int term1G_reg
= spe_allocate_available_register(f
);
276 int term1B_reg
= spe_allocate_available_register(f
);
277 int term1A_reg
= spe_allocate_available_register(f
);
279 int term2R_reg
= spe_allocate_available_register(f
);
280 int term2G_reg
= spe_allocate_available_register(f
);
281 int term2B_reg
= spe_allocate_available_register(f
);
282 int term2A_reg
= spe_allocate_available_register(f
);
284 int fbR_reg
= spe_allocate_available_register(f
);
285 int fbG_reg
= spe_allocate_available_register(f
);
286 int fbB_reg
= spe_allocate_available_register(f
);
287 int fbA_reg
= spe_allocate_available_register(f
);
289 int tmp_reg
= spe_allocate_available_register(f
);
291 /* Optional constant registers we might or might not end up using;
292 * if we do use them, make sure we only allocate them once by
293 * keeping a flag on each one.
295 boolean one_reg_set
= false;
296 unsigned int one_reg
;
297 boolean constR_reg_set
= false, constG_reg_set
= false,
298 constB_reg_set
= false, constA_reg_set
= false;
299 unsigned int constR_reg
, constG_reg
, constB_reg
, constA_reg
;
301 ASSERT(blend
->blend_enable
);
303 /* Unpack/convert framebuffer colors from four 32-bit packed colors
304 * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
305 * Each 8-bit color component is expanded into a float in [0.0, 1.0].
308 int mask_reg
= spe_allocate_available_register(f
);
310 /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
311 spe_load_int(f
, mask_reg
, 0xff);
313 /* XXX there may be more clever ways to implement the following code */
314 switch (color_format
) {
315 case PIPE_FORMAT_A8R8G8B8_UNORM
:
316 /* fbB = fbB & mask */
317 spe_and(f
, fbB_reg
, fbRGBA_reg
, mask_reg
);
318 /* mask = mask << 8 */
319 spe_roti(f
, mask_reg
, mask_reg
, 8);
321 /* fbG = fbRGBA & mask */
322 spe_and(f
, fbG_reg
, fbRGBA_reg
, mask_reg
);
324 spe_roti(f
, fbG_reg
, fbG_reg
, -8);
325 /* mask = mask << 8 */
326 spe_roti(f
, mask_reg
, mask_reg
, 8);
328 /* fbR = fbRGBA & mask */
329 spe_and(f
, fbR_reg
, fbRGBA_reg
, mask_reg
);
330 /* fbR = fbR >> 16 */
331 spe_roti(f
, fbR_reg
, fbR_reg
, -16);
332 /* mask = mask << 8 */
333 spe_roti(f
, mask_reg
, mask_reg
, 8);
335 /* fbA = fbRGBA & mask */
336 spe_and(f
, fbA_reg
, fbRGBA_reg
, mask_reg
);
337 /* fbA = fbA >> 24 */
338 spe_roti(f
, fbA_reg
, fbA_reg
, -24);
341 case PIPE_FORMAT_B8G8R8A8_UNORM
:
342 /* fbA = fbA & mask */
343 spe_and(f
, fbA_reg
, fbRGBA_reg
, mask_reg
);
344 /* mask = mask << 8 */
345 spe_roti(f
, mask_reg
, mask_reg
, 8);
347 /* fbR = fbRGBA & mask */
348 spe_and(f
, fbR_reg
, fbRGBA_reg
, mask_reg
);
350 spe_roti(f
, fbR_reg
, fbR_reg
, -8);
351 /* mask = mask << 8 */
352 spe_roti(f
, mask_reg
, mask_reg
, 8);
354 /* fbG = fbRGBA & mask */
355 spe_and(f
, fbG_reg
, fbRGBA_reg
, mask_reg
);
356 /* fbG = fbG >> 16 */
357 spe_roti(f
, fbG_reg
, fbG_reg
, -16);
358 /* mask = mask << 8 */
359 spe_roti(f
, mask_reg
, mask_reg
, 8);
361 /* fbB = fbRGBA & mask */
362 spe_and(f
, fbB_reg
, fbRGBA_reg
, mask_reg
);
363 /* fbB = fbB >> 24 */
364 spe_roti(f
, fbB_reg
, fbB_reg
, -24);
371 /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
372 spe_cuflt(f
, fbR_reg
, fbR_reg
, 8);
373 spe_cuflt(f
, fbG_reg
, fbG_reg
, 8);
374 spe_cuflt(f
, fbB_reg
, fbB_reg
, 8);
375 spe_cuflt(f
, fbA_reg
, fbA_reg
, 8);
377 spe_release_register(f
, mask_reg
);
381 * Compute Src RGB terms. We're actually looking for the value
382 * of (the appropriate RGB factors) * (the incoming source RGB color),
383 * because in some cases (like PIPE_BLENDFACTOR_ONE and
384 * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math.
386 switch (blend
->rgb_src_factor
) {
387 case PIPE_BLENDFACTOR_ONE
:
388 /* factors = (1,1,1), so term = (R,G,B) */
389 spe_move(f
, term1R_reg
, fragR_reg
);
390 spe_move(f
, term1G_reg
, fragG_reg
);
391 spe_move(f
, term1B_reg
, fragB_reg
);
393 case PIPE_BLENDFACTOR_ZERO
:
394 /* factors = (0,0,0), so term = (0,0,0) */
395 spe_load_float(f
, term1R_reg
, 0.0f
);
396 spe_load_float(f
, term1G_reg
, 0.0f
);
397 spe_load_float(f
, term1B_reg
, 0.0f
);
399 case PIPE_BLENDFACTOR_SRC_COLOR
:
400 /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
401 spe_fm(f
, term1R_reg
, fragR_reg
, fragR_reg
);
402 spe_fm(f
, term1G_reg
, fragG_reg
, fragG_reg
);
403 spe_fm(f
, term1B_reg
, fragB_reg
, fragB_reg
);
405 case PIPE_BLENDFACTOR_SRC_ALPHA
:
406 /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
407 spe_fm(f
, term1R_reg
, fragR_reg
, fragA_reg
);
408 spe_fm(f
, term1G_reg
, fragG_reg
, fragA_reg
);
409 spe_fm(f
, term1B_reg
, fragB_reg
, fragA_reg
);
411 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
412 /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B))
413 * or in other words term = (R-R*R, G-G*G, B-B*B)
414 * fnms(a,b,c,d) computes a = d - b*c
416 spe_fnms(f
, term1R_reg
, fragR_reg
, fragR_reg
, fragR_reg
);
417 spe_fnms(f
, term1G_reg
, fragG_reg
, fragG_reg
, fragG_reg
);
418 spe_fnms(f
, term1B_reg
, fragB_reg
, fragB_reg
, fragB_reg
);
420 case PIPE_BLENDFACTOR_DST_COLOR
:
421 /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
422 spe_fm(f
, term1R_reg
, fragR_reg
, fbR_reg
);
423 spe_fm(f
, term1G_reg
, fragG_reg
, fbG_reg
);
424 spe_fm(f
, term1B_reg
, fragB_reg
, fbB_reg
);
426 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
427 /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb))
428 * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb)
429 * fnms(a,b,c,d) computes a = d - b*c
431 spe_fnms(f
, term1R_reg
, fragR_reg
, fbR_reg
, fragR_reg
);
432 spe_fnms(f
, term1G_reg
, fragG_reg
, fbG_reg
, fragG_reg
);
433 spe_fnms(f
, term1B_reg
, fragB_reg
, fbB_reg
, fragB_reg
);
435 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
436 /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A))
437 * or term = (R-R*A,G-G*A,B-B*A)
438 * fnms(a,b,c,d) computes a = d - b*c
440 spe_fnms(f
, term1R_reg
, fragR_reg
, fragA_reg
, fragR_reg
);
441 spe_fnms(f
, term1G_reg
, fragG_reg
, fragA_reg
, fragG_reg
);
442 spe_fnms(f
, term1B_reg
, fragB_reg
, fragA_reg
, fragB_reg
);
444 case PIPE_BLENDFACTOR_DST_ALPHA
:
445 /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
446 spe_fm(f
, term1R_reg
, fragR_reg
, fbA_reg
);
447 spe_fm(f
, term1G_reg
, fragG_reg
, fbA_reg
);
448 spe_fm(f
, term1B_reg
, fragB_reg
, fbA_reg
);
450 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
451 /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb))
452 * or term = (R-R*Afb,G-G*Afb,b-B*Afb)
453 * fnms(a,b,c,d) computes a = d - b*c
455 spe_fnms(f
, term1R_reg
, fragR_reg
, fbA_reg
, fragR_reg
);
456 spe_fnms(f
, term1G_reg
, fragG_reg
, fbA_reg
, fragG_reg
);
457 spe_fnms(f
, term1B_reg
, fragB_reg
, fbA_reg
, fragB_reg
);
459 case PIPE_BLENDFACTOR_CONST_COLOR
:
460 /* We need the optional constant color registers */
461 setup_const_register(f
, &constR_reg_set
, &constR_reg
, blend_color
->color
[0]);
462 setup_const_register(f
, &constG_reg_set
, &constG_reg
, blend_color
->color
[1]);
463 setup_const_register(f
, &constB_reg_set
, &constB_reg
, blend_color
->color
[2]);
464 /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
465 spe_fm(f
, term1R_reg
, fragR_reg
, constR_reg
);
466 spe_fm(f
, term1G_reg
, fragG_reg
, constG_reg
);
467 spe_fm(f
, term1B_reg
, fragB_reg
, constB_reg
);
469 case PIPE_BLENDFACTOR_CONST_ALPHA
:
470 /* we'll need the optional constant alpha register */
471 setup_const_register(f
, &constA_reg_set
, &constA_reg
, blend_color
->color
[3]);
472 /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
473 spe_fm(f
, term1R_reg
, fragR_reg
, constA_reg
);
474 spe_fm(f
, term1G_reg
, fragG_reg
, constA_reg
);
475 spe_fm(f
, term1B_reg
, fragB_reg
, constA_reg
);
477 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
478 /* We need the optional constant color registers */
479 setup_const_register(f
, &constR_reg_set
, &constR_reg
, blend_color
->color
[0]);
480 setup_const_register(f
, &constG_reg_set
, &constG_reg
, blend_color
->color
[1]);
481 setup_const_register(f
, &constB_reg_set
, &constB_reg
, blend_color
->color
[2]);
482 /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc))
483 * or term = (R-R*Rc, G-G*Gc, B-B*Bc)
484 * fnms(a,b,c,d) computes a = d - b*c
486 spe_fnms(f
, term1R_reg
, fragR_reg
, constR_reg
, fragR_reg
);
487 spe_fnms(f
, term1G_reg
, fragG_reg
, constG_reg
, fragG_reg
);
488 spe_fnms(f
, term1B_reg
, fragB_reg
, constB_reg
, fragB_reg
);
490 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
491 /* We need the optional constant color registers */
492 setup_const_register(f
, &constR_reg_set
, &constR_reg
, blend_color
->color
[0]);
493 setup_const_register(f
, &constG_reg_set
, &constG_reg
, blend_color
->color
[1]);
494 setup_const_register(f
, &constB_reg_set
, &constB_reg
, blend_color
->color
[2]);
495 /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac))
496 * or term = (R-R*Ac,G-G*Ac,B-B*Ac)
497 * fnms(a,b,c,d) computes a = d - b*c
499 spe_fnms(f
, term1R_reg
, fragR_reg
, constA_reg
, fragR_reg
);
500 spe_fnms(f
, term1G_reg
, fragG_reg
, constA_reg
, fragG_reg
);
501 spe_fnms(f
, term1B_reg
, fragB_reg
, constA_reg
, fragB_reg
);
503 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
504 /* We'll need the optional {1,1,1,1} register */
505 setup_const_register(f
, &one_reg_set
, &one_reg
, 1.0f
);
506 /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so
507 * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
508 * We could expand the term (as a*min(b,c) == min(a*b,a*c)
509 * as long as a is positive), but then we'd have to do three
510 * spe_float_min() functions instead of one, so this is simpler.
513 spe_fs(f
, tmp_reg
, one_reg
, fbA_reg
);
514 /* tmp = min(A,tmp) */
515 spe_float_min(f
, tmp_reg
, fragA_reg
, tmp_reg
);
517 spe_fm(f
, term1R_reg
, fragR_reg
, tmp_reg
);
518 spe_fm(f
, term1G_reg
, fragG_reg
, tmp_reg
);
519 spe_fm(f
, term1B_reg
, fragB_reg
, tmp_reg
);
522 /* These are special D3D cases involving a second color output
523 * from the fragment shader. I'm not sure we can support them
526 case PIPE_BLENDFACTOR_SRC1_COLOR
:
527 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
528 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
529 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
536 * Compute Src Alpha term. Like the above, we're looking for
537 * the full term A*factor, not just the factor itself, because
538 * in many cases we can avoid doing unnecessary multiplies.
540 switch (blend
->alpha_src_factor
) {
541 case PIPE_BLENDFACTOR_ZERO
:
542 /* factor = 0, so term = 0 */
543 spe_load_float(f
, term1A_reg
, 0.0f
);
546 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
: /* fall through */
547 case PIPE_BLENDFACTOR_ONE
:
548 /* factor = 1, so term = A */
549 spe_move(f
, term1A_reg
, fragA_reg
);
552 case PIPE_BLENDFACTOR_SRC_COLOR
:
553 /* factor = A, so term = A*A */
554 spe_fm(f
, term1A_reg
, fragA_reg
, fragA_reg
);
556 case PIPE_BLENDFACTOR_SRC_ALPHA
:
557 spe_fm(f
, term1A_reg
, fragA_reg
, fragA_reg
);
560 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
: /* fall through */
561 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
562 /* factor = 1-A, so term = A*(1-A) = A-A*A */
563 /* fnms(a,b,c,d) computes a = d - b*c */
564 spe_fnms(f
, term1A_reg
, fragA_reg
, fragA_reg
, fragA_reg
);
567 case PIPE_BLENDFACTOR_DST_ALPHA
: /* fall through */
568 case PIPE_BLENDFACTOR_DST_COLOR
:
569 /* factor = Afb, so term = A*Afb */
570 spe_fm(f
, term1A_reg
, fragA_reg
, fbA_reg
);
573 case PIPE_BLENDFACTOR_INV_DST_ALPHA
: /* fall through */
574 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
575 /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */
576 /* fnms(a,b,c,d) computes a = d - b*c */
577 spe_fnms(f
, term1A_reg
, fragA_reg
, fbA_reg
, fragA_reg
);
580 case PIPE_BLENDFACTOR_CONST_ALPHA
: /* fall through */
581 case PIPE_BLENDFACTOR_CONST_COLOR
:
582 /* We need the optional constA_reg register */
583 setup_const_register(f
, &constA_reg_set
, &constA_reg
, blend_color
->color
[3]);
584 /* factor = Ac, so term = A*Ac */
585 spe_fm(f
, term1A_reg
, fragA_reg
, constA_reg
);
588 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
: /* fall through */
589 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
590 /* We need the optional constA_reg register */
591 setup_const_register(f
, &constA_reg_set
, &constA_reg
, blend_color
->color
[3]);
592 /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */
593 /* fnms(a,b,c,d) computes a = d - b*c */
594 spe_fnms(f
, term1A_reg
, fragA_reg
, constA_reg
, fragA_reg
);
597 /* These are special D3D cases involving a second color output
598 * from the fragment shader. I'm not sure we can support them
601 case PIPE_BLENDFACTOR_SRC1_COLOR
:
602 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
603 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
604 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
610 * Compute Dest RGB term. Like the above, we're looking for
611 * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because
612 * in many cases we can avoid doing unnecessary multiplies.
614 switch (blend
->rgb_dst_factor
) {
615 case PIPE_BLENDFACTOR_ONE
:
616 /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */
617 spe_move(f
, term2R_reg
, fbR_reg
);
618 spe_move(f
, term2G_reg
, fbG_reg
);
619 spe_move(f
, term2B_reg
, fbB_reg
);
621 case PIPE_BLENDFACTOR_ZERO
:
622 /* factor s= (0,0,0), so term = (0,0,0) */
623 spe_load_float(f
, term2R_reg
, 0.0f
);
624 spe_load_float(f
, term2G_reg
, 0.0f
);
625 spe_load_float(f
, term2B_reg
, 0.0f
);
627 case PIPE_BLENDFACTOR_SRC_COLOR
:
628 /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */
629 spe_fm(f
, term2R_reg
, fbR_reg
, fragR_reg
);
630 spe_fm(f
, term2G_reg
, fbG_reg
, fragG_reg
);
631 spe_fm(f
, term2B_reg
, fbB_reg
, fragB_reg
);
633 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
634 /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B))
635 * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B)
636 * fnms(a,b,c,d) computes a = d - b*c
638 spe_fnms(f
, term2R_reg
, fragR_reg
, fbR_reg
, fbR_reg
);
639 spe_fnms(f
, term2G_reg
, fragG_reg
, fbG_reg
, fbG_reg
);
640 spe_fnms(f
, term2B_reg
, fragB_reg
, fbB_reg
, fbB_reg
);
642 case PIPE_BLENDFACTOR_SRC_ALPHA
:
643 /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */
644 spe_fm(f
, term2R_reg
, fbR_reg
, fragA_reg
);
645 spe_fm(f
, term2G_reg
, fbG_reg
, fragA_reg
);
646 spe_fm(f
, term2B_reg
, fbB_reg
, fragA_reg
);
648 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
649 /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */
650 /* fnms(a,b,c,d) computes a = d - b*c */
651 spe_fnms(f
, term2R_reg
, fbR_reg
, fragA_reg
, fbR_reg
);
652 spe_fnms(f
, term2G_reg
, fbG_reg
, fragA_reg
, fbG_reg
);
653 spe_fnms(f
, term2B_reg
, fbB_reg
, fragA_reg
, fbB_reg
);
655 case PIPE_BLENDFACTOR_DST_COLOR
:
656 /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */
657 spe_fm(f
, term2R_reg
, fbR_reg
, fbR_reg
);
658 spe_fm(f
, term2G_reg
, fbG_reg
, fbG_reg
);
659 spe_fm(f
, term2B_reg
, fbB_reg
, fbB_reg
);
661 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
662 /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb))
663 * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb)
664 * fnms(a,b,c,d) computes a = d - b*c
666 spe_fnms(f
, term2R_reg
, fbR_reg
, fbR_reg
, fbR_reg
);
667 spe_fnms(f
, term2G_reg
, fbG_reg
, fbG_reg
, fbG_reg
);
668 spe_fnms(f
, term2B_reg
, fbB_reg
, fbB_reg
, fbB_reg
);
671 case PIPE_BLENDFACTOR_DST_ALPHA
:
672 /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */
673 spe_fm(f
, term2R_reg
, fbR_reg
, fbA_reg
);
674 spe_fm(f
, term2G_reg
, fbG_reg
, fbA_reg
);
675 spe_fm(f
, term2B_reg
, fbB_reg
, fbA_reg
);
677 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
678 /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb))
679 * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb)
680 * fnms(a,b,c,d) computes a = d - b*c
682 spe_fnms(f
, term2R_reg
, fbR_reg
, fbA_reg
, fbR_reg
);
683 spe_fnms(f
, term2G_reg
, fbG_reg
, fbA_reg
, fbG_reg
);
684 spe_fnms(f
, term2B_reg
, fbB_reg
, fbA_reg
, fbB_reg
);
686 case PIPE_BLENDFACTOR_CONST_COLOR
:
687 /* We need the optional constant color registers */
688 setup_const_register(f
, &constR_reg_set
, &constR_reg
, blend_color
->color
[0]);
689 setup_const_register(f
, &constG_reg_set
, &constG_reg
, blend_color
->color
[1]);
690 setup_const_register(f
, &constB_reg_set
, &constB_reg
, blend_color
->color
[2]);
691 /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */
692 spe_fm(f
, term2R_reg
, fbR_reg
, constR_reg
);
693 spe_fm(f
, term2G_reg
, fbG_reg
, constG_reg
);
694 spe_fm(f
, term2B_reg
, fbB_reg
, constB_reg
);
696 case PIPE_BLENDFACTOR_CONST_ALPHA
:
697 /* we'll need the optional constant alpha register */
698 setup_const_register(f
, &constA_reg_set
, &constA_reg
, blend_color
->color
[3]);
699 /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */
700 spe_fm(f
, term2R_reg
, fbR_reg
, constA_reg
);
701 spe_fm(f
, term2G_reg
, fbG_reg
, constA_reg
);
702 spe_fm(f
, term2B_reg
, fbB_reg
, constA_reg
);
704 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
705 /* We need the optional constant color registers */
706 setup_const_register(f
, &constR_reg_set
, &constR_reg
, blend_color
->color
[0]);
707 setup_const_register(f
, &constG_reg_set
, &constG_reg
, blend_color
->color
[1]);
708 setup_const_register(f
, &constB_reg_set
, &constB_reg
, blend_color
->color
[2]);
709 /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc))
710 * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc)
711 * fnms(a,b,c,d) computes a = d - b*c
713 spe_fnms(f
, term2R_reg
, fbR_reg
, constR_reg
, fbR_reg
);
714 spe_fnms(f
, term2G_reg
, fbG_reg
, constG_reg
, fbG_reg
);
715 spe_fnms(f
, term2B_reg
, fbB_reg
, constB_reg
, fbB_reg
);
717 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
718 /* We need the optional constant color registers */
719 setup_const_register(f
, &constR_reg_set
, &constR_reg
, blend_color
->color
[0]);
720 setup_const_register(f
, &constG_reg_set
, &constG_reg
, blend_color
->color
[1]);
721 setup_const_register(f
, &constB_reg_set
, &constB_reg
, blend_color
->color
[2]);
722 /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac))
723 * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac)
724 * fnms(a,b,c,d) computes a = d - b*c
726 spe_fnms(f
, term2R_reg
, fbR_reg
, constA_reg
, fbR_reg
);
727 spe_fnms(f
, term2G_reg
, fbG_reg
, constA_reg
, fbG_reg
);
728 spe_fnms(f
, term2B_reg
, fbB_reg
, constA_reg
, fbB_reg
);
730 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
: /* not supported for dest RGB */
734 /* These are special D3D cases involving a second color output
735 * from the fragment shader. I'm not sure we can support them
738 case PIPE_BLENDFACTOR_SRC1_COLOR
:
739 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
740 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
741 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
748 * Compute Dest Alpha term. Like the above, we're looking for
749 * the full term Afb*factor, not just the factor itself, because
750 * in many cases we can avoid doing unnecessary multiplies.
752 switch (blend
->alpha_dst_factor
) {
753 case PIPE_BLENDFACTOR_ONE
:
754 /* factor = 1, so term = Afb */
755 spe_move(f
, term2A_reg
, fbA_reg
);
757 case PIPE_BLENDFACTOR_ZERO
:
758 /* factor = 0, so term = 0 */
759 spe_load_float(f
, term2A_reg
, 0.0f
);
762 case PIPE_BLENDFACTOR_SRC_ALPHA
: /* fall through */
763 case PIPE_BLENDFACTOR_SRC_COLOR
:
764 /* factor = A, so term = Afb*A */
765 spe_fm(f
, term2A_reg
, fbA_reg
, fragA_reg
);
768 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
: /* fall through */
769 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
770 /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */
771 /* fnms(a,b,c,d) computes a = d - b*c */
772 spe_fnms(f
, term2A_reg
, fbA_reg
, fragA_reg
, fbA_reg
);
775 case PIPE_BLENDFACTOR_DST_ALPHA
: /* fall through */
776 case PIPE_BLENDFACTOR_DST_COLOR
:
777 /* factor = Afb, so term = Afb*Afb */
778 spe_fm(f
, term2A_reg
, fbA_reg
, fbA_reg
);
781 case PIPE_BLENDFACTOR_INV_DST_ALPHA
: /* fall through */
782 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
783 /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */
784 /* fnms(a,b,c,d) computes a = d - b*c */
785 spe_fnms(f
, term2A_reg
, fbA_reg
, fbA_reg
, fbA_reg
);
788 case PIPE_BLENDFACTOR_CONST_ALPHA
: /* fall through */
789 case PIPE_BLENDFACTOR_CONST_COLOR
:
790 /* We need the optional constA_reg register */
791 setup_const_register(f
, &constA_reg_set
, &constA_reg
, blend_color
->color
[3]);
792 /* factor = Ac, so term = Afb*Ac */
793 spe_fm(f
, term2A_reg
, fbA_reg
, constA_reg
);
796 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
: /* fall through */
797 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
798 /* We need the optional constA_reg register */
799 setup_const_register(f
, &constA_reg_set
, &constA_reg
, blend_color
->color
[3]);
800 /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */
801 /* fnms(a,b,c,d) computes a = d - b*c */
802 spe_fnms(f
, term2A_reg
, fbA_reg
, constA_reg
, fbA_reg
);
805 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
: /* not supported for dest alpha */
809 /* These are special D3D cases involving a second color output
810 * from the fragment shader. I'm not sure we can support them
813 case PIPE_BLENDFACTOR_SRC1_COLOR
:
814 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
815 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
816 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
822 * Combine Src/Dest RGB terms as per the blend equation.
824 switch (blend
->rgb_func
) {
826 spe_fa(f
, fragR_reg
, term1R_reg
, term2R_reg
);
827 spe_fa(f
, fragG_reg
, term1G_reg
, term2G_reg
);
828 spe_fa(f
, fragB_reg
, term1B_reg
, term2B_reg
);
830 case PIPE_BLEND_SUBTRACT
:
831 spe_fs(f
, fragR_reg
, term1R_reg
, term2R_reg
);
832 spe_fs(f
, fragG_reg
, term1G_reg
, term2G_reg
);
833 spe_fs(f
, fragB_reg
, term1B_reg
, term2B_reg
);
835 case PIPE_BLEND_REVERSE_SUBTRACT
:
836 spe_fs(f
, fragR_reg
, term2R_reg
, term1R_reg
);
837 spe_fs(f
, fragG_reg
, term2G_reg
, term1G_reg
);
838 spe_fs(f
, fragB_reg
, term2B_reg
, term1B_reg
);
841 spe_float_min(f
, fragR_reg
, term1R_reg
, term2R_reg
);
842 spe_float_min(f
, fragG_reg
, term1G_reg
, term2G_reg
);
843 spe_float_min(f
, fragB_reg
, term1B_reg
, term2B_reg
);
846 spe_float_max(f
, fragR_reg
, term1R_reg
, term2R_reg
);
847 spe_float_max(f
, fragG_reg
, term1G_reg
, term2G_reg
);
848 spe_float_max(f
, fragB_reg
, term1B_reg
, term2B_reg
);
855 * Combine Src/Dest A term
857 switch (blend
->alpha_func
) {
859 spe_fa(f
, fragA_reg
, term1A_reg
, term2A_reg
);
861 case PIPE_BLEND_SUBTRACT
:
862 spe_fs(f
, fragA_reg
, term1A_reg
, term2A_reg
);
864 case PIPE_BLEND_REVERSE_SUBTRACT
:
865 spe_fs(f
, fragA_reg
, term2A_reg
, term1A_reg
);
868 spe_float_min(f
, fragA_reg
, term1A_reg
, term2A_reg
);
871 spe_float_max(f
, fragA_reg
, term1A_reg
, term2A_reg
);
877 spe_release_register(f
, term1R_reg
);
878 spe_release_register(f
, term1G_reg
);
879 spe_release_register(f
, term1B_reg
);
880 spe_release_register(f
, term1A_reg
);
882 spe_release_register(f
, term2R_reg
);
883 spe_release_register(f
, term2G_reg
);
884 spe_release_register(f
, term2B_reg
);
885 spe_release_register(f
, term2A_reg
);
887 spe_release_register(f
, fbR_reg
);
888 spe_release_register(f
, fbG_reg
);
889 spe_release_register(f
, fbB_reg
);
890 spe_release_register(f
, fbA_reg
);
892 spe_release_register(f
, tmp_reg
);
894 /* Free any optional registers that actually got used */
895 release_const_register(f
, &one_reg_set
, one_reg
);
896 release_const_register(f
, &constR_reg_set
, constR_reg
);
897 release_const_register(f
, &constG_reg_set
, constG_reg
);
898 release_const_register(f
, &constB_reg_set
, constB_reg
);
899 release_const_register(f
, &constA_reg_set
, constA_reg
);
904 gen_logicop(const struct pipe_blend_state
*blend
,
905 struct spe_function
*f
,
906 int fragRGBA_reg
, int fbRGBA_reg
)
908 /* We've got four 32-bit RGBA packed pixels in each of
909 * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
910 * reds, greens, blues, and alphas.
912 ASSERT(blend
->logicop_enable
);
914 switch(blend
->logicop_func
) {
915 case PIPE_LOGICOP_CLEAR
: /* 0 */
916 spe_zero(f
, fragRGBA_reg
);
918 case PIPE_LOGICOP_NOR
: /* ~(s | d) */
919 spe_nor(f
, fragRGBA_reg
, fragRGBA_reg
, fbRGBA_reg
);
921 case PIPE_LOGICOP_AND_INVERTED
: /* ~s & d */
922 /* andc R, A, B computes R = A & ~B */
923 spe_andc(f
, fragRGBA_reg
, fbRGBA_reg
, fragRGBA_reg
);
925 case PIPE_LOGICOP_COPY_INVERTED
: /* ~s */
926 spe_complement(f
, fragRGBA_reg
, fragRGBA_reg
);
928 case PIPE_LOGICOP_AND_REVERSE
: /* s & ~d */
929 /* andc R, A, B computes R = A & ~B */
930 spe_andc(f
, fragRGBA_reg
, fragRGBA_reg
, fbRGBA_reg
);
932 case PIPE_LOGICOP_INVERT
: /* ~d */
933 /* Note that (A nor A) == ~(A|A) == ~A */
934 spe_nor(f
, fragRGBA_reg
, fbRGBA_reg
, fbRGBA_reg
);
936 case PIPE_LOGICOP_XOR
: /* s ^ d */
937 spe_xor(f
, fragRGBA_reg
, fragRGBA_reg
, fbRGBA_reg
);
939 case PIPE_LOGICOP_NAND
: /* ~(s & d) */
940 spe_nand(f
, fragRGBA_reg
, fragRGBA_reg
, fbRGBA_reg
);
942 case PIPE_LOGICOP_AND
: /* s & d */
943 spe_and(f
, fragRGBA_reg
, fragRGBA_reg
, fbRGBA_reg
);
945 case PIPE_LOGICOP_EQUIV
: /* ~(s ^ d) */
946 spe_xor(f
, fragRGBA_reg
, fragRGBA_reg
, fbRGBA_reg
);
947 spe_complement(f
, fragRGBA_reg
, fragRGBA_reg
);
949 case PIPE_LOGICOP_NOOP
: /* d */
950 spe_move(f
, fragRGBA_reg
, fbRGBA_reg
);
952 case PIPE_LOGICOP_OR_INVERTED
: /* ~s | d */
953 /* orc R, A, B computes R = A | ~B */
954 spe_orc(f
, fragRGBA_reg
, fbRGBA_reg
, fragRGBA_reg
);
956 case PIPE_LOGICOP_COPY
: /* s */
958 case PIPE_LOGICOP_OR_REVERSE
: /* s | ~d */
959 /* orc R, A, B computes R = A | ~B */
960 spe_orc(f
, fragRGBA_reg
, fragRGBA_reg
, fbRGBA_reg
);
962 case PIPE_LOGICOP_OR
: /* s | d */
963 spe_or(f
, fragRGBA_reg
, fragRGBA_reg
, fbRGBA_reg
);
965 case PIPE_LOGICOP_SET
: /* 1 */
966 spe_load_int(f
, fragRGBA_reg
, 0xffffffff);
975 * Generate code to pack a quad of float colors into four 32-bit integers.
977 * \param f SPE function to append instruction onto.
978 * \param color_format the dest color packing format
979 * \param r_reg register containing four red values (in/clobbered)
980 * \param g_reg register containing four green values (in/clobbered)
981 * \param b_reg register containing four blue values (in/clobbered)
982 * \param a_reg register containing four alpha values (in/clobbered)
983 * \param rgba_reg register to store the packed RGBA colors (out)
986 gen_pack_colors(struct spe_function
*f
,
987 enum pipe_format color_format
,
988 int r_reg
, int g_reg
, int b_reg
, int a_reg
,
991 int rg_reg
= spe_allocate_available_register(f
);
992 int ba_reg
= spe_allocate_available_register(f
);
994 /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
995 spe_cfltu(f
, r_reg
, r_reg
, 32);
996 spe_cfltu(f
, g_reg
, g_reg
, 32);
997 spe_cfltu(f
, b_reg
, b_reg
, 32);
998 spe_cfltu(f
, a_reg
, a_reg
, 32);
1000 /* Shift the most significant bytes to the least significant positions.
1001 * I.e.: reg = reg >> 24
1003 spe_rotmi(f
, r_reg
, r_reg
, -24);
1004 spe_rotmi(f
, g_reg
, g_reg
, -24);
1005 spe_rotmi(f
, b_reg
, b_reg
, -24);
1006 spe_rotmi(f
, a_reg
, a_reg
, -24);
1008 /* Shift the color bytes according to the surface format */
1009 if (color_format
== PIPE_FORMAT_A8R8G8B8_UNORM
) {
1010 spe_roti(f
, g_reg
, g_reg
, 8); /* green <<= 8 */
1011 spe_roti(f
, r_reg
, r_reg
, 16); /* red <<= 16 */
1012 spe_roti(f
, a_reg
, a_reg
, 24); /* alpha <<= 24 */
1014 else if (color_format
== PIPE_FORMAT_B8G8R8A8_UNORM
) {
1015 spe_roti(f
, r_reg
, r_reg
, 8); /* red <<= 8 */
1016 spe_roti(f
, g_reg
, g_reg
, 16); /* green <<= 16 */
1017 spe_roti(f
, b_reg
, b_reg
, 24); /* blue <<= 24 */
1023 /* Merge red, green, blue, alpha registers to make packed RGBA colors.
1024 * Eg: after shifting according to color_format we might have:
1025 * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
1026 * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
1027 * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
1028 * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
1029 * OR-ing all those together gives us four packed colors:
1030 * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
1032 spe_or(f
, rg_reg
, r_reg
, g_reg
);
1033 spe_or(f
, ba_reg
, a_reg
, b_reg
);
1034 spe_or(f
, rgba_reg
, rg_reg
, ba_reg
);
1036 spe_release_register(f
, rg_reg
);
1037 spe_release_register(f
, ba_reg
);
1041 gen_colormask(struct spe_function
*f
,
1043 enum pipe_format color_format
,
1044 int fragRGBA_reg
, int fbRGBA_reg
)
1046 /* We've got four 32-bit RGBA packed pixels in each of
1047 * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
1048 * reds, greens, blues, and alphas. Further, the pixels
1049 * are packed according to the given color format, not
1050 * necessarily RGBA...
1052 unsigned int r_mask
;
1053 unsigned int g_mask
;
1054 unsigned int b_mask
;
1055 unsigned int a_mask
;
1057 /* Calculate exactly where the bits for any particular color
1058 * end up, so we can mask them correctly.
1060 switch(color_format
) {
1061 case PIPE_FORMAT_A8R8G8B8_UNORM
:
1063 a_mask
= 0xff000000;
1064 r_mask
= 0x00ff0000;
1065 g_mask
= 0x0000ff00;
1066 b_mask
= 0x000000ff;
1068 case PIPE_FORMAT_B8G8R8A8_UNORM
:
1070 b_mask
= 0xff000000;
1071 g_mask
= 0x00ff0000;
1072 r_mask
= 0x0000ff00;
1073 a_mask
= 0x000000ff;
1079 /* For each R, G, B, and A component we're supposed to mask out,
1080 * clear its bits. Then our mask operation later will work
1083 if (!(colormask
& PIPE_MASK_R
)) {
1086 if (!(colormask
& PIPE_MASK_G
)) {
1089 if (!(colormask
& PIPE_MASK_B
)) {
1092 if (!(colormask
& PIPE_MASK_A
)) {
1096 /* Get a temporary register to hold the mask that will be applied to the fragment */
1097 int colormask_reg
= spe_allocate_available_register(f
);
1099 /* The actual mask we're going to use is an OR of the remaining R, G, B, and A
1100 * masks. Load the result value into our temporary register.
1102 spe_load_uint(f
, colormask_reg
, r_mask
| g_mask
| b_mask
| a_mask
);
1104 /* Use the mask register to select between the fragment color
1105 * values and the frame buffer color values. Wherever the
1106 * mask has a 0 bit, the current frame buffer color should override
1107 * the fragment color. Wherever the mask has a 1 bit, the
1108 * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM)
1109 * instruction will select bits from its first operand rA wherever the
1110 * the mask bits rM are 0, and from its second operand rB wherever the
1111 * mask bits rM are 1. That means that the frame buffer color is the
1112 * first operand, and the fragment color the second.
1114 spe_selb(f
, fragRGBA_reg
, fbRGBA_reg
, fragRGBA_reg
, colormask_reg
);
1116 /* Release the temporary register and we're done */
1117 spe_release_register(f
, colormask_reg
);
1121 * Generate SPE code to implement the fragment operations (alpha test,
1122 * depth test, stencil test, blending, colormask, and final
1123 * framebuffer write) as specified by the current context state.
1125 * Logically, this code will be called after running the fragment
1126 * shader. But under some circumstances we could run some of this
1127 * code before the fragment shader to cull fragments/quads that are
1128 * totally occluded/discarded.
1130 * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
1132 * See the spu_default_fragment_ops() function to see how the per-fragment
1133 * operations would be done with ordinary C code.
1134 * The code we generate here though has no branches, is SIMD, etc and
1135 * should be much faster.
1137 * \param cell the rendering context (in)
1138 * \param f the generated function (out)
1141 cell_gen_fragment_function(struct cell_context
*cell
, struct spe_function
*f
)
1143 const struct pipe_depth_stencil_alpha_state
*dsa
= cell
->depth_stencil
;
1144 const struct pipe_blend_state
*blend
= cell
->blend
;
1145 const struct pipe_blend_color
*blend_color
= &cell
->blend_color
;
1146 const enum pipe_format color_format
= cell
->framebuffer
.cbufs
[0]->format
;
1148 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1149 const int x_reg
= 3; /* uint */
1150 const int y_reg
= 4; /* uint */
1151 const int color_tile_reg
= 5; /* tile_t * */
1152 const int depth_tile_reg
= 6; /* tile_t * */
1153 const int fragZ_reg
= 7; /* vector float */
1154 const int fragR_reg
= 8; /* vector float */
1155 const int fragG_reg
= 9; /* vector float */
1156 const int fragB_reg
= 10; /* vector float */
1157 const int fragA_reg
= 11; /* vector float */
1158 const int mask_reg
= 12; /* vector uint */
1160 /* offset of quad from start of tile
1161 * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
1163 int quad_offset_reg
;
1165 int fbRGBA_reg
; /**< framebuffer's RGBA colors for quad */
1166 int fbZS_reg
; /**< framebuffer's combined z/stencil values for quad */
1168 spe_init_func(f
, SPU_MAX_FRAGMENT_OPS_INSTS
* SPE_INST_SIZE
);
1170 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1171 spe_print_code(f
, true);
1173 spe_comment(f
, -4, "Begin per-fragment ops");
1176 spe_allocate_register(f
, x_reg
);
1177 spe_allocate_register(f
, y_reg
);
1178 spe_allocate_register(f
, color_tile_reg
);
1179 spe_allocate_register(f
, depth_tile_reg
);
1180 spe_allocate_register(f
, fragZ_reg
);
1181 spe_allocate_register(f
, fragR_reg
);
1182 spe_allocate_register(f
, fragG_reg
);
1183 spe_allocate_register(f
, fragB_reg
);
1184 spe_allocate_register(f
, fragA_reg
);
1185 spe_allocate_register(f
, mask_reg
);
1187 quad_offset_reg
= spe_allocate_available_register(f
);
1188 fbRGBA_reg
= spe_allocate_available_register(f
);
1189 fbZS_reg
= spe_allocate_available_register(f
);
1191 /* compute offset of quad from start of tile, in bytes */
1193 int x2_reg
= spe_allocate_available_register(f
);
1194 int y2_reg
= spe_allocate_available_register(f
);
1196 ASSERT(TILE_SIZE
== 32);
1198 spe_rotmi(f
, y2_reg
, y_reg
, -1); /* y2 = y / 2 */
1199 spe_rotmi(f
, x2_reg
, x_reg
, -1); /* x2 = x / 2 */
1200 spe_shli(f
, y2_reg
, y2_reg
, 4); /* y2 *= 16 */
1201 spe_a(f
, quad_offset_reg
, y2_reg
, x2_reg
); /* offset = y2 + x2 */
1202 spe_shli(f
, quad_offset_reg
, quad_offset_reg
, 4); /* offset *= 16 */
1204 spe_release_register(f
, x2_reg
);
1205 spe_release_register(f
, y2_reg
);
1209 if (dsa
->alpha
.enabled
) {
1210 gen_alpha_test(dsa
, f
, mask_reg
, fragA_reg
);
1213 if (dsa
->depth
.enabled
|| dsa
->stencil
[0].enabled
) {
1214 const enum pipe_format zs_format
= cell
->framebuffer
.zsbuf
->format
;
1215 boolean write_depth_stencil
;
1217 int fbZ_reg
= spe_allocate_available_register(f
); /* Z values */
1218 int fbS_reg
= spe_allocate_available_register(f
); /* Stencil values */
1220 /* fetch quad of depth/stencil values from tile at (x,y) */
1221 /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
1222 spe_lqx(f
, fbZS_reg
, depth_tile_reg
, quad_offset_reg
);
1224 if (dsa
->depth
.enabled
) {
1225 /* Extract Z bits from fbZS_reg into fbZ_reg */
1226 if (zs_format
== PIPE_FORMAT_S8Z24_UNORM
||
1227 zs_format
== PIPE_FORMAT_X8Z24_UNORM
) {
1228 int mask_reg
= spe_allocate_available_register(f
);
1229 spe_fsmbi(f
, mask_reg
, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
1230 spe_and(f
, fbZ_reg
, fbZS_reg
, mask_reg
); /* fbZ = fbZS & mask */
1231 spe_release_register(f
, mask_reg
);
1232 /* OK, fbZ_reg has four 24-bit Z values now */
1234 else if (zs_format
== PIPE_FORMAT_Z24S8_UNORM
||
1235 zs_format
== PIPE_FORMAT_Z24X8_UNORM
) {
1236 spe_rotmi(f
, fbZ_reg
, fbZS_reg
, -8); /* fbZ = fbZS >> 8 */
1237 /* OK, fbZ_reg has four 24-bit Z values now */
1239 else if (zs_format
== PIPE_FORMAT_Z32_UNORM
) {
1240 spe_move(f
, fbZ_reg
, fbZS_reg
);
1241 /* OK, fbZ_reg has four 32-bit Z values now */
1243 else if (zs_format
== PIPE_FORMAT_Z16_UNORM
) {
1244 spe_move(f
, fbZ_reg
, fbZS_reg
);
1245 /* OK, fbZ_reg has four 16-bit Z values now */
1248 ASSERT(0); /* invalid format */
1251 /* Convert fragZ values from float[4] to 16, 24 or 32-bit uint[4] */
1252 if (zs_format
== PIPE_FORMAT_S8Z24_UNORM
||
1253 zs_format
== PIPE_FORMAT_X8Z24_UNORM
||
1254 zs_format
== PIPE_FORMAT_Z24S8_UNORM
||
1255 zs_format
== PIPE_FORMAT_Z24X8_UNORM
) {
1256 /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */
1257 spe_cfltu(f
, fragZ_reg
, fragZ_reg
, 32);
1258 /* fragZ = fragZ >> 8 */
1259 spe_rotmi(f
, fragZ_reg
, fragZ_reg
, -8);
1261 else if (zs_format
== PIPE_FORMAT_Z32_UNORM
) {
1262 /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */
1263 spe_cfltu(f
, fragZ_reg
, fragZ_reg
, 32);
1265 else if (zs_format
== PIPE_FORMAT_Z16_UNORM
) {
1266 /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */
1267 spe_cfltu(f
, fragZ_reg
, fragZ_reg
, 32);
1268 /* fragZ = fragZ >> 16 */
1269 spe_rotmi(f
, fragZ_reg
, fragZ_reg
, -16);
1273 /* no Z test, but set Z to zero so we don't OR-in garbage below */
1274 spe_load_uint(f
, fbZ_reg
, 0); /* XXX set to zero for now */
1278 if (dsa
->stencil
[0].enabled
) {
1279 /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
1280 if (zs_format
== PIPE_FORMAT_S8Z24_UNORM
||
1281 zs_format
== PIPE_FORMAT_X8Z24_UNORM
) {
1282 /* XXX extract with a shift */
1285 else if (zs_format
== PIPE_FORMAT_Z24S8_UNORM
||
1286 zs_format
== PIPE_FORMAT_Z24X8_UNORM
) {
1287 /* XXX extract with a mask */
1292 /* no stencil test, but set to zero so we don't OR-in garbage below */
1293 spe_load_uint(f
, fbS_reg
, 0); /* XXX set to zero for now */
1296 if (dsa
->stencil
[0].enabled
) {
1297 /* XXX this may involve depth testing too */
1298 // gen_stencil_test(dsa, f, ... );
1301 else if (dsa
->depth
.enabled
) {
1302 int zmask_reg
= spe_allocate_available_register(f
);
1303 gen_depth_test(dsa
, f
, mask_reg
, fragZ_reg
, fbZ_reg
, zmask_reg
);
1304 spe_release_register(f
, zmask_reg
);
1307 /* do we need to write Z and/or Stencil back into framebuffer? */
1308 write_depth_stencil
= (dsa
->depth
.writemask
|
1309 dsa
->stencil
[0].write_mask
|
1310 dsa
->stencil
[1].write_mask
);
1312 if (write_depth_stencil
) {
1313 /* Merge latest Z and Stencil values into fbZS_reg.
1314 * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
1315 * fbS_reg has four 8-bit Z values in bits [7..0].
1317 if (zs_format
== PIPE_FORMAT_S8Z24_UNORM
||
1318 zs_format
== PIPE_FORMAT_X8Z24_UNORM
) {
1319 spe_shli(f
, fbS_reg
, fbS_reg
, 24); /* fbS = fbS << 24 */
1320 spe_or(f
, fbZS_reg
, fbS_reg
, fbZ_reg
); /* fbZS = fbS | fbZ */
1322 else if (zs_format
== PIPE_FORMAT_Z24S8_UNORM
||
1323 zs_format
== PIPE_FORMAT_Z24X8_UNORM
) {
1324 spe_shli(f
, fbZ_reg
, fbZ_reg
, 8); /* fbZ = fbZ << 8 */
1325 spe_or(f
, fbZS_reg
, fbS_reg
, fbZ_reg
); /* fbZS = fbS | fbZ */
1327 else if (zs_format
== PIPE_FORMAT_Z32_UNORM
) {
1328 spe_move(f
, fbZS_reg
, fbZ_reg
); /* fbZS = fbZ */
1330 else if (zs_format
== PIPE_FORMAT_Z16_UNORM
) {
1331 spe_move(f
, fbZS_reg
, fbZ_reg
); /* fbZS = fbZ */
1333 else if (zs_format
== PIPE_FORMAT_S8_UNORM
) {
1334 ASSERT(0); /* XXX to do */
1337 ASSERT(0); /* bad zs_format */
1340 /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
1341 spe_stqx(f
, fbZS_reg
, depth_tile_reg
, quad_offset_reg
);
1344 spe_release_register(f
, fbZ_reg
);
1345 spe_release_register(f
, fbS_reg
);
1349 /* Get framebuffer quad/colors. We'll need these for blending,
1350 * color masking, and to obey the quad/pixel mask.
1351 * Load: fbRGBA_reg = memory[color_tile + quad_offset]
1352 * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
1353 * we could skip this load.
1355 spe_lqx(f
, fbRGBA_reg
, color_tile_reg
, quad_offset_reg
);
1358 if (blend
->blend_enable
) {
1359 gen_blend(blend
, blend_color
, f
, color_format
,
1360 fragR_reg
, fragG_reg
, fragB_reg
, fragA_reg
, fbRGBA_reg
);
1364 * Write fragment colors to framebuffer/tile.
1365 * This involves converting the fragment colors from float[4] to the
1366 * tile's specific format and obeying the quad/pixel mask.
1369 int rgba_reg
= spe_allocate_available_register(f
);
1371 /* Pack four float colors as four 32-bit int colors */
1372 gen_pack_colors(f
, color_format
,
1373 fragR_reg
, fragG_reg
, fragB_reg
, fragA_reg
,
1376 if (blend
->logicop_enable
) {
1377 gen_logicop(blend
, f
, rgba_reg
, fbRGBA_reg
);
1380 if (blend
->colormask
!= PIPE_MASK_RGBA
) {
1381 gen_colormask(f
, blend
->colormask
, color_format
, rgba_reg
, fbRGBA_reg
);
1385 /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
1387 * rgba[i] = rgba[i];
1389 * rgba[i] = framebuffer[i];
1391 spe_selb(f
, rgba_reg
, fbRGBA_reg
, rgba_reg
, mask_reg
);
1393 /* Store updated quad in tile:
1394 * memory[color_tile + quad_offset] = rgba_reg;
1396 spe_stqx(f
, rgba_reg
, color_tile_reg
, quad_offset_reg
);
1398 spe_release_register(f
, rgba_reg
);
1401 //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
1403 spe_bi(f
, SPE_REG_RA
, 0, 0); /* return from function call */
1405 spe_release_register(f
, fbRGBA_reg
);
1406 spe_release_register(f
, fbZS_reg
);
1407 spe_release_register(f
, quad_offset_reg
);
1409 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1410 spe_comment(f
, -4, "End per-fragment ops");