int tmp_a = spe_allocate_available_register(f);
int tmp_b = spe_allocate_available_register(f);
union {
- float f;
- unsigned u;
+ float f;
+ unsigned u;
} ref_val;
boolean complement = FALSE;
switch (dsa->alpha.func) {
case PIPE_FUNC_NOTEQUAL:
- complement = TRUE;
- /* FALLTHROUGH */
+ complement = TRUE;
+ /* FALLTHROUGH */
case PIPE_FUNC_EQUAL:
- spe_fceq(f, tmp_a, ref, alphas);
- break;
+ spe_fceq(f, tmp_a, ref, alphas);
+ break;
case PIPE_FUNC_LEQUAL:
- complement = TRUE;
- /* FALLTHROUGH */
+ complement = TRUE;
+ /* FALLTHROUGH */
case PIPE_FUNC_GREATER:
- spe_fcgt(f, tmp_a, ref, alphas);
- break;
+ spe_fcgt(f, tmp_a, ref, alphas);
+ break;
case PIPE_FUNC_LESS:
- complement = TRUE;
- /* FALLTHROUGH */
+ complement = TRUE;
+ /* FALLTHROUGH */
case PIPE_FUNC_GEQUAL:
- spe_fcgt(f, tmp_a, ref, alphas);
- spe_fceq(f, tmp_b, ref, alphas);
- spe_or(f, tmp_a, tmp_b, tmp_a);
- break;
+ spe_fcgt(f, tmp_a, ref, alphas);
+ spe_fceq(f, tmp_b, ref, alphas);
+ spe_or(f, tmp_a, tmp_b, tmp_a);
+ break;
case PIPE_FUNC_ALWAYS:
case PIPE_FUNC_NEVER:
default:
- assert(0);
- break;
+ assert(0);
+ break;
}
if (complement) {
- spe_andc(f, mask, mask, tmp_a);
+ spe_andc(f, mask, mask, tmp_a);
} else {
- spe_and(f, mask, mask, tmp_a);
+ spe_and(f, mask, mask, tmp_a);
}
spe_release_register(f, ref);
/**
+ * Generate code to perform Z testing. Four Z values are tested at once.
* \param dsa Current depth-test state
* \param f Function to which code should be appended
- * \param m Mask of allocated / free SPE registers
* \param mask Index of register to contain depth-pass mask
* \param stored Index of register containing values from depth buffer
* \param calculated Index of register containing per-fragment depth values
/**
+ * Generate code to apply the stencil operation (after testing).
* \note Emits a maximum of 5 instructions.
+ *
+ * \warning
+ * Since \c out and \c in might be the same register, this routine cannot
+ * generate code that uses \c out as a temporary.
*/
static void
emit_stencil_op(struct spe_function *f,
int out, int in, int mask, unsigned op, unsigned ref)
{
const int clamp = spe_allocate_available_register(f);
- const int tmp = spe_allocate_available_register(f);
+ const int clamp_mask = spe_allocate_available_register(f);
+ const int result = spe_allocate_available_register(f);
switch(op) {
case PIPE_STENCIL_OP_KEEP:
assert(0);
case PIPE_STENCIL_OP_ZERO:
- spe_il(f, out, 0);
+ spe_il(f, result, 0);
break;
case PIPE_STENCIL_OP_REPLACE:
- spe_il(f, out, ref);
+ spe_il(f, result, ref);
break;
case PIPE_STENCIL_OP_INCR:
+ /* clamp = [0xff, 0xff, 0xff, 0xff] */
spe_il(f, clamp, 0x0ff);
- spe_ai(f, out, in, 1);
- spe_cgti(f, tmp, out, clamp);
- spe_selb(f, out, out, clamp, tmp);
+ /* result[i] = in[i] + 1 */
+ spe_ai(f, result, in, 1);
+ /* clamp_mask[i] = (result[i] > 0xff) */
+ spe_clgti(f, clamp_mask, result, 0x0ff);
+ /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
+ spe_selb(f, result, result, clamp, clamp_mask);
break;
case PIPE_STENCIL_OP_DECR:
spe_il(f, clamp, 0);
- spe_ai(f, out, in, -1);
- spe_cgti(f, tmp, out, clamp);
- spe_selb(f, out, clamp, out, tmp);
+ spe_ai(f, result, in, -1);
+
+ /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
+ * arithmetic.
+ */
+ spe_clgti(f, clamp_mask, result, 0x0ff);
+ spe_selb(f, result, result, clamp, clamp_mask);
break;
case PIPE_STENCIL_OP_INCR_WRAP:
- spe_ai(f, out, in, 1);
+ spe_ai(f, result, in, 1);
break;
case PIPE_STENCIL_OP_DECR_WRAP:
- spe_ai(f, out, in, -1);
+ spe_ai(f, result, in, -1);
break;
case PIPE_STENCIL_OP_INVERT:
- spe_nor(f, out, in, in);
+ spe_nor(f, result, in, in);
break;
default:
assert(0);
}
- spe_release_register(f, tmp);
- spe_release_register(f, clamp);
+ spe_selb(f, out, in, result, mask);
- spe_selb(f, out, in, out, mask);
+ spe_release_register(f, result);
+ spe_release_register(f, clamp_mask);
+ spe_release_register(f, clamp);
}
/**
+ * Generate code to do stencil test. Four pixels are tested at once.
* \param dsa Depth / stencil test state
* \param face 0 for front face, 1 for back face
* \param f Function to append instructions to
- * \param reg_mask Mask of allocated registers
* \param mask Register containing mask of fragments passing the
* alpha test
* \param depth_mask Register containing mask of fragments passing the
* \param stencil Register containing values from stencil buffer
* \param depth_pass Register to store mask of fragments passing stencil test
* and depth test
- *
+ *
* \note
* Emits a maximum of 10 + (3 * 5) = 25 instructions.
*/
static int
emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
- unsigned face,
+ struct pipe_stencil_ref *sr,
+ unsigned face,
struct spe_function *f,
int mask,
int depth_mask,
int stencil_pass = spe_allocate_available_register(f);
int face_stencil = spe_allocate_available_register(f);
int stencil_src = stencil;
- const unsigned ref = (dsa->stencil[face].ref_value
- & dsa->stencil[face].value_mask);
+ const unsigned ref = (sr->ref_value[face]
+ & dsa->stencil[face].valuemask);
boolean complement = FALSE;
- int stored = spe_allocate_available_register(f);
+ int stored;
int tmp = spe_allocate_available_register(f);
if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
&& (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
- && (dsa->stencil[face].value_mask != 0x0ff)) {
- spe_andi(f, stored, stencil, dsa->stencil[face].value_mask);
+ && (dsa->stencil[face].valuemask != 0x0ff)) {
+ stored = spe_allocate_available_register(f);
+ spe_andi(f, stored, stencil, dsa->stencil[face].valuemask);
+ } else {
+ stored = stencil;
}
switch (dsa->stencil[face].func) {
case PIPE_FUNC_NEVER:
- spe_il(f, stencil_mask, 0);
+ spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
break;
case PIPE_FUNC_NOTEQUAL:
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_EQUAL:
+ /* stencil_mask[i] = (stored[i] == ref) */
spe_ceqi(f, stencil_mask, stored, ref);
break;
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GREATER:
+ complement = TRUE;
+ /* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
break;
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GEQUAL:
+ /* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
+ /* tmp[i] = (stored[i] == ref) */
spe_ceqi(f, tmp, stored, ref);
+ /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
spe_or(f, stencil_mask, stencil_mask, tmp);
break;
break;
}
- spe_release_register(f, stored);
+ if (stored != stencil) {
+ spe_release_register(f, stored);
+ }
spe_release_register(f, tmp);
/* Conditionally emit code to update the stencil value under various
* condititons. Note that there is no need to generate code under the
* following circumstances:
- *
+ *
* - Stencil write mask is zero.
* - For stencil-fail if the stencil test is ALWAYS
* - For depth-fail if the stencil test is NEVER
* - For depth-pass if the stencil test is NEVER
* - Any of the 3 conditions if the operation is KEEP
*/
- if (dsa->stencil[face].write_mask != 0) {
+ if (dsa->stencil[face].writemask != 0) {
if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
&& (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
if (complement) {
emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
dsa->stencil[face].fail_op,
- dsa->stencil[face].ref_value);
+ sr->ref_value[face]);
stencil_src = face_stencil;
}
emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
dsa->stencil[face].zfail_op,
- dsa->stencil[face].ref_value);
+ sr->ref_value[face]);
stencil_src = face_stencil;
}
&& (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
dsa->stencil[face].zpass_op,
- dsa->stencil[face].ref_value);
+ sr->ref_value[face]);
stencil_src = face_stencil;
}
}
*/
if (stencil_src == stencil) {
spe_release_register(f, face_stencil);
- } else if (dsa->stencil[face].write_mask != 0x0ff) {
+ } else if (dsa->stencil[face].writemask != 0x0ff) {
int tmp = spe_allocate_available_register(f);
-
- spe_il(f, tmp, dsa->stencil[face].write_mask);
+
+ spe_il(f, tmp, dsa->stencil[face].writemask);
spe_selb(f, stencil_src, stencil, stencil_src, tmp);
spe_release_register(f, tmp);
void
-cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
+cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa,
+ struct pipe_stencil_ref *sr)
{
struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
struct spe_function *const f = &cdsa->code;
* + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
* up to 64 to make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
/* Allocate registers for the function's input parameters. Cleverly (and
if (dsa->stencil[0].enabled) {
const int front_depth_pass = spe_allocate_available_register(f);
- int front_stencil = emit_stencil_test(dsa, 0, f, mask,
+ int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask,
depth_mask, depth_complement,
stencil, front_depth_pass);
if (dsa->stencil[1].enabled) {
const int back_depth_pass = spe_allocate_available_register(f);
- int back_stencil = emit_stencil_test(dsa, 1, f, mask,
+ int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask,
depth_mask, depth_complement,
stencil, back_depth_pass);
if (front_stencil != back_stencil) {
spe_selb(f, stencil, back_stencil, front_stencil, facing);
}
-
- if (back_stencil != stencil) {
+
+ if (back_stencil != stencil) {
spe_release_register(f, back_stencil);
}
- if (front_stencil != stencil) {
+ if (front_stencil != stencil) {
spe_release_register(f, front_stencil);
}
spe_release_register(f, back_depth_pass);
} else {
- if (front_stencil != stencil) {
+ if (front_stencil != stencil) {
spe_or(f, stencil, front_stencil, front_stencil);
spe_release_register(f, front_stencil);
}
+ spe_or(f, mask, front_depth_pass, front_depth_pass);
}
spe_release_register(f, front_depth_pass);
spe_selb(f, depth, depth, zvals, mask);
}
- spe_bi(f, 0, 0, 0);
+ spe_bi(f, 0, 0, 0); /* return from function call */
#if 0
dsa->stencil[i].zfail_op,
dsa->stencil[i].zpass_op);
printf("# ref value / value mask / write mask: %02x %02x %02x\n",
- dsa->stencil[i].ref_value,
- dsa->stencil[i].value_mask,
- dsa->stencil[i].write_mask);
+ sr->ref_value[i],
+ dsa->stencil[i].valuemask,
+ dsa->stencil[i].writemask);
}
printf("\t.text\n");
*/
static int
emit_alpha_factor_calculation(struct spe_function *f,
- unsigned factor, float const_alpha,
- int src_alpha, int dst_alpha)
+ unsigned factor,
+ int src_alpha, int dst_alpha, int const_alpha)
{
- union {
- float f;
- unsigned u;
- } alpha;
int factor_reg;
int tmp;
- alpha.f = const_alpha;
-
switch (factor) {
case PIPE_BLENDFACTOR_ONE:
factor_reg = -1;
break;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- const_alpha = 1.0 - const_alpha;
- /* FALLTHROUGH */
- case PIPE_BLENDFACTOR_CONST_ALPHA:
factor_reg = spe_allocate_available_register(f);
- spe_il(f, factor_reg, alpha.u & 0x0ffff);
- spe_ilh(f, factor_reg, alpha.u >> 16);
+ tmp = spe_allocate_available_register(f);
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor_reg, tmp, const_alpha);
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ factor_reg = const_alpha;
break;
case PIPE_BLENDFACTOR_ZERO:
/**
- * \note Emits a maximum of 5 instructions
+ * \note Emits a maximum of 6 instructions
*/
static void
emit_color_factor_calculation(struct spe_function *f,
unsigned sF, unsigned mask,
- const struct pipe_blend_color *blend_color,
const int *src,
const int *dst,
+ const int *const_color,
int *factor)
{
- union {
- float f[4];
- unsigned u[4];
- } color;
int tmp;
unsigned i;
- color.f[0] = blend_color->color[0];
- color.f[1] = blend_color->color[1];
- color.f[2] = blend_color->color[2];
- color.f[3] = blend_color->color[3];
-
factor[0] = -1;
factor[1] = -1;
factor[2] = -1;
break;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- color.f[0] = 1.0 - color.f[0];
- color.f[1] = 1.0 - color.f[1];
- color.f[2] = 1.0 - color.f[2];
- /* FALLTHROUGH */
- case PIPE_BLENDFACTOR_CONST_COLOR:
+ tmp = spe_allocate_available_register(f);
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+
for (i = 0; i < 3; i++) {
- factor[i] = spe_allocate_available_register(f);
+ factor[i] = spe_allocate_available_register(f);
+
+ spe_fs(f, factor[i], tmp, const_color[i]);
+ }
+ spe_release_register(f, tmp);
+ break;
- spe_il(f, factor[i], color.u[i] & 0x0ffff);
- spe_ilh(f, factor[i], color.u[i] >> 16);
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ for (i = 0; i < 3; i++) {
+ factor[i] = const_color[i];
}
break;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- color.f[3] = 1.0 - color.f[3];
- /* FALLTHROUGH */
- case PIPE_BLENDFACTOR_CONST_ALPHA:
factor[0] = spe_allocate_available_register(f);
factor[1] = factor[0];
factor[2] = factor[0];
- spe_il(f, factor[0], color.u[3] & 0x0ffff);
- spe_ilh(f, factor[0], color.u[3] >> 16);
+ tmp = spe_allocate_available_register(f);
+ spe_il(f, tmp, 1);
+ spe_cuflt(f, tmp, tmp, 0);
+ spe_fs(f, factor[0], tmp, const_color[3]);
+ spe_release_register(f, tmp);
+ break;
+
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ factor[0] = const_color[3];
+ factor[1] = factor[0];
+ factor[2] = factor[0];
break;
case PIPE_BLENDFACTOR_ZERO:
spe_il(f, src, 0);
} else if (dF == PIPE_BLENDFACTOR_ONE) {
spe_or(f, src, dst, dst);
+ } else {
+ spe_fm(f, src, dst, dst_factor);
}
+ } else if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_fm(f, src, src, src_factor);
} else {
spe_fm(f, tmp, dst, dst_factor);
spe_fma(f, src, src, src_factor, tmp);
} else if (dF == PIPE_BLENDFACTOR_ONE) {
spe_il(f, tmp, 0);
spe_fs(f, src, tmp, dst);
+ } else {
+ spe_fm(f, src, dst, dst_factor);
}
+ } else if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_fm(f, src, src, src_factor);
} else {
spe_fm(f, tmp, dst, dst_factor);
spe_fms(f, src, src, src_factor, tmp);
spe_il(f, src, 0);
} else if (dF == PIPE_BLENDFACTOR_ONE) {
spe_or(f, src, dst, dst);
+ } else {
+ spe_fm(f, src, dst, dst_factor);
}
+ } else if (dF == PIPE_BLENDFACTOR_ZERO) {
+ spe_fm(f, src, src, src_factor);
} else {
spe_fm(f, tmp, src, src_factor);
spe_fms(f, src, src, dst_factor, tmp);
case PIPE_BLEND_MIN:
spe_cgt(f, tmp, src, dst);
- spe_selb(f, src, dst, src, tmp);
+ spe_selb(f, src, src, dst, tmp);
break;
case PIPE_BLEND_MAX:
spe_cgt(f, tmp, src, dst);
- spe_selb(f, src, src, dst, tmp);
+ spe_selb(f, src, dst, src, tmp);
break;
default:
* Generate code to perform alpha blending on the SPE
*/
void
-cell_generate_alpha_blend(struct cell_blend_state *cb,
- const struct pipe_blend_color *blend_color)
+cell_generate_alpha_blend(struct cell_blend_state *cb)
{
struct pipe_blend_state *const b = &cb->base;
struct spe_function *const f = &cb->code;
/* This code generates a maximum of 3 (source alpha factor)
- * + 3 (destination alpha factor) + (3 * 5) (source color factor)
- * + (3 * 5) (destination color factor) + (4 * 2) (blend equation)
- * + 4 (fragment mask) + 1 (return) = 49 instlructions. Round up to 64 to
+ * + 3 (destination alpha factor) + (3 * 6) (source color factor)
+ * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
+ * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
* make it a happy power-of-two.
*/
- spe_init_func(f, 4 * 64);
+ spe_init_func(f, SPE_INST_SIZE * 64);
const int frag[4] = {
spe_allocate_register(f, 9),
spe_allocate_register(f, 10),
};
- const int mask = spe_allocate_register(f, 11);
+ const int const_color[4] = {
+ spe_allocate_register(f, 11),
+ spe_allocate_register(f, 12),
+ spe_allocate_register(f, 13),
+ spe_allocate_register(f, 14),
+ };
unsigned func[4];
unsigned sF[4];
unsigned dF[4];
/* Does the selected blend mode make use of the source / destination
* color (RGB) blend factors?
*/
- boolean need_color_factor = b->blend_enable
- && (b->rgb_func != PIPE_BLEND_MIN)
- && (b->rgb_func != PIPE_BLEND_MAX);
+ boolean need_color_factor = b->rt[0].blend_enable
+ && (b->rt[0].rgb_func != PIPE_BLEND_MIN)
+ && (b->rt[0].rgb_func != PIPE_BLEND_MAX);
/* Does the selected blend mode make use of the source / destination
* alpha blend factors?
*/
- boolean need_alpha_factor = b->blend_enable
- && (b->alpha_func != PIPE_BLEND_MIN)
- && (b->alpha_func != PIPE_BLEND_MAX);
-
+ boolean need_alpha_factor = b->rt[0].blend_enable
+ && (b->rt[0].alpha_func != PIPE_BLEND_MIN)
+ && (b->rt[0].alpha_func != PIPE_BLEND_MAX);
+
+
+ if (b->rt[0].blend_enable) {
+ sF[0] = b->rt[0].rgb_src_factor;
+ sF[1] = sF[0];
+ sF[2] = sF[0];
+ switch (b->rt[0].alpha_src_factor & 0x0f) {
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ sF[3] = PIPE_BLENDFACTOR_ONE;
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ sF[3] = b->rt[0].alpha_src_factor + 1;
+ break;
+ default:
+ sF[3] = b->rt[0].alpha_src_factor;
+ }
- sF[0] = b->rgb_src_factor;
- sF[1] = sF[0];
- sF[2] = sF[0];
- sF[3] = (b->alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
- ? PIPE_BLENDFACTOR_ONE : b->alpha_src_factor;
+ dF[0] = b->rt[0].rgb_dst_factor;
+ dF[1] = dF[0];
+ dF[2] = dF[0];
+ switch (b->rt[0].alpha_dst_factor & 0x0f) {
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ dF[3] = b->rt[0].alpha_dst_factor + 1;
+ break;
+ default:
+ dF[3] = b->rt[0].alpha_dst_factor;
+ }
- dF[0] = b->rgb_dst_factor;
- dF[1] = dF[0];
- dF[2] = dF[0];
- dF[3] = b->rgb_dst_factor;
+ func[0] = b->rt[0].rgb_func;
+ func[1] = func[0];
+ func[2] = func[0];
+ func[3] = b->rt[0].alpha_func;
+ } else {
+ sF[0] = PIPE_BLENDFACTOR_ONE;
+ sF[1] = PIPE_BLENDFACTOR_ONE;
+ sF[2] = PIPE_BLENDFACTOR_ONE;
+ sF[3] = PIPE_BLENDFACTOR_ONE;
+ dF[0] = PIPE_BLENDFACTOR_ZERO;
+ dF[1] = PIPE_BLENDFACTOR_ZERO;
+ dF[2] = PIPE_BLENDFACTOR_ZERO;
+ dF[3] = PIPE_BLENDFACTOR_ZERO;
+
+ func[0] = PIPE_BLEND_ADD;
+ func[1] = PIPE_BLEND_ADD;
+ func[2] = PIPE_BLEND_ADD;
+ func[3] = PIPE_BLEND_ADD;
+ }
/* If alpha writing is enabled and the alpha blend mode requires use of
* the alpha factor, calculate the alpha factor.
*/
- if (((b->colormask & 8) != 0) && need_alpha_factor) {
- src_factor[3] = emit_alpha_factor_calculation(f, sF[3],
- blend_color->color[3],
+ if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) {
+ src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
frag[3], pixel[3]);
/* If the alpha destination blend factor is the same as the alpha source
*/
dst_factor[3] = (dF[3] == sF[3])
? src_factor[3]
- : emit_alpha_factor_calculation(f, dF[3],
- blend_color->color[3],
+ : emit_alpha_factor_calculation(f, dF[3], const_color[3],
frag[3], pixel[3]);
}
-
+
if (sF[0] == sF[3]) {
src_factor[0] = src_factor[3];
src_factor[2] = dst_factor[3];
} else if (need_color_factor) {
emit_color_factor_calculation(f,
- b->rgb_src_factor,
- b->colormask,
- blend_color,
- frag, pixel, src_factor);
+ b->rt[0].rgb_src_factor,
+ b->rt[0].colormask,
+ frag, pixel, const_color, src_factor);
}
dst_factor[2] = src_factor[2];
} else if (need_color_factor) {
emit_color_factor_calculation(f,
- b->rgb_dst_factor,
- b->colormask,
- blend_color,
- frag, pixel, dst_factor);
+ b->rt[0].rgb_dst_factor,
+ b->rt[0].colormask,
+ frag, pixel, const_color, dst_factor);
}
-
- func[0] = b->rgb_func;
- func[1] = func[0];
- func[2] = func[0];
- func[3] = b->alpha_func;
for (i = 0; i < 4; ++i) {
- if ((b->colormask & (1U << i)) != 0) {
+ if ((b->rt[0].colormask & (1U << i)) != 0) {
emit_blend_calculation(f,
func[i], sF[i], dF[i],
frag[i], src_factor[i],
pixel[i], dst_factor[i]);
- spe_selb(f, frag[i], pixel[i], frag[i], mask);
- } else {
- spe_or(f, frag[i], pixel[i], pixel[i]);
}
}
spe_bi(f, 0, 0, 0);
+
+#if 0
+ {
+ const uint32_t *p = f->store;
+
+ printf("# %u instructions\n", f->csr - f->store);
+ printf("# blend (%sabled)\n",
+ (cb->base.blend_enable) ? "en" : "dis");
+ printf("# RGB func / sf / df: %u %u %u\n",
+ cb->base.rgb_func,
+ cb->base.rgb_src_factor,
+ cb->base.rgb_dst_factor);
+ printf("# ALP func / sf / df: %u %u %u\n",
+ cb->base.alpha_func,
+ cb->base.alpha_src_factor,
+ cb->base.alpha_dst_factor);
+
+ printf("\t.text\n");
+ for (/* empty */; p < f->csr; p++) {
+ printf("\t.long\t0x%04x\n", *p);
+ }
+ fflush(stdout);
+ }
+#endif
+}
+
+
+static int
+PC_OFFSET(const struct spe_function *f, const void *d)
+{
+ const intptr_t pc = (intptr_t) &f->store[f->num_inst];
+ const intptr_t ea = ~0x0f & (intptr_t) d;
+
+ return (ea - pc) >> 2;
+}
+
+
+/**
+ * Generate code to perform color conversion and logic op
+ *
+ * \bug
+ * The code generated by this function should also perform dithering.
+ *
+ * \bug
+ * The code generated by this function should also perform color-write
+ * masking.
+ *
+ * \bug
+ * Only two framebuffer formats are supported at this time.
+ */
+void
+cell_generate_logic_op(struct spe_function *f,
+ const struct pipe_blend_state *blend,
+ struct pipe_surface *surf)
+{
+ const unsigned logic_op = (blend->logicop_enable)
+ ? blend->logicop_func : PIPE_LOGICOP_COPY;
+
+ /* This code generates a maximum of 37 instructions. An additional 32
+ * bytes (equiv. to 8 instructions) are needed for data storage. Round up
+ * to 64 to make it a happy power-of-two.
+ */
+ spe_init_func(f, SPE_INST_SIZE * 64);
+
+
+ /* Pixel colors in framebuffer format in AoS layout.
+ */
+ const int pixel[4] = {
+ spe_allocate_register(f, 3),
+ spe_allocate_register(f, 4),
+ spe_allocate_register(f, 5),
+ spe_allocate_register(f, 6),
+ };
+
+ /* Fragment colors stored as floats in SoA layout.
+ */
+ const int frag[4] = {
+ spe_allocate_register(f, 7),
+ spe_allocate_register(f, 8),
+ spe_allocate_register(f, 9),
+ spe_allocate_register(f, 10),
+ };
+
+ const int mask = spe_allocate_register(f, 11);
+
+
+ /* Short-circuit the noop and invert cases.
+ */
+ if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) {
+ spe_bi(f, 0, 0, 0);
+ return;
+ } else if (logic_op == PIPE_LOGICOP_INVERT) {
+ spe_nor(f, pixel[0], pixel[0], pixel[0]);
+ spe_nor(f, pixel[1], pixel[1], pixel[1]);
+ spe_nor(f, pixel[2], pixel[2], pixel[2]);
+ spe_nor(f, pixel[3], pixel[3], pixel[3]);
+ spe_bi(f, 0, 0, 0);
+ return;
+ }
+
+
+ const int tmp[4] = {
+ spe_allocate_available_register(f),
+ spe_allocate_available_register(f),
+ spe_allocate_available_register(f),
+ spe_allocate_available_register(f),
+ };
+
+ const int shuf_xpose_hi = spe_allocate_available_register(f);
+ const int shuf_xpose_lo = spe_allocate_available_register(f);
+ const int shuf_color = spe_allocate_available_register(f);
+
+
+ /* Pointer to the begining of the function's private data area.
+ */
+ uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
+
+
+ /* Convert fragment colors to framebuffer format in AoS layout.
+ */
+ switch (surf->format) {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ data[0] = 0x00010203;
+ data[1] = 0x10111213;
+ data[2] = 0x04050607;
+ data[3] = 0x14151617;
+ data[4] = 0x0c000408;
+ data[5] = 0x80808080;
+ data[6] = 0x80808080;
+ data[7] = 0x80808080;
+ break;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ data[0] = 0x03020100;
+ data[1] = 0x13121110;
+ data[2] = 0x07060504;
+ data[3] = 0x17161514;
+ data[4] = 0x0804000c;
+ data[5] = 0x80808080;
+ data[6] = 0x80808080;
+ data[7] = 0x80808080;
+ break;
+ default:
+ fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
+ ASSERT(0);
+ }
+
+ spe_ilh(f, tmp[0], 0x0808);
+ spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
+ spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
+ spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
+
+ spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
+ spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
+ spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
+ spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
+
+ spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
+ spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
+ spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
+ spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
+
+ spe_cfltu(f, frag[0], frag[0], 32);
+ spe_cfltu(f, frag[1], frag[1], 32);
+ spe_cfltu(f, frag[2], frag[2], 32);
+ spe_cfltu(f, frag[3], frag[3], 32);
+
+ spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
+ spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
+ spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
+ spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
+
+
+ /* If logic op is enabled, perform the requested logical operation on the
+ * converted fragment colors and the pixel colors.
+ */
+ switch (logic_op) {
+ case PIPE_LOGICOP_CLEAR:
+ spe_il(f, frag[0], 0);
+ spe_il(f, frag[1], 0);
+ spe_il(f, frag[2], 0);
+ spe_il(f, frag[3], 0);
+ break;
+ case PIPE_LOGICOP_NOR:
+ spe_nor(f, frag[0], frag[0], pixel[0]);
+ spe_nor(f, frag[1], frag[1], pixel[1]);
+ spe_nor(f, frag[2], frag[2], pixel[2]);
+ spe_nor(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_AND_INVERTED:
+ spe_andc(f, frag[0], pixel[0], frag[0]);
+ spe_andc(f, frag[1], pixel[1], frag[1]);
+ spe_andc(f, frag[2], pixel[2], frag[2]);
+ spe_andc(f, frag[3], pixel[3], frag[3]);
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ spe_nor(f, frag[0], frag[0], frag[0]);
+ spe_nor(f, frag[1], frag[1], frag[1]);
+ spe_nor(f, frag[2], frag[2], frag[2]);
+ spe_nor(f, frag[3], frag[3], frag[3]);
+ break;
+ case PIPE_LOGICOP_AND_REVERSE:
+ spe_andc(f, frag[0], frag[0], pixel[0]);
+ spe_andc(f, frag[1], frag[1], pixel[1]);
+ spe_andc(f, frag[2], frag[2], pixel[2]);
+ spe_andc(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_XOR:
+ spe_xor(f, frag[0], frag[0], pixel[0]);
+ spe_xor(f, frag[1], frag[1], pixel[1]);
+ spe_xor(f, frag[2], frag[2], pixel[2]);
+ spe_xor(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_NAND:
+ spe_nand(f, frag[0], frag[0], pixel[0]);
+ spe_nand(f, frag[1], frag[1], pixel[1]);
+ spe_nand(f, frag[2], frag[2], pixel[2]);
+ spe_nand(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_AND:
+ spe_and(f, frag[0], frag[0], pixel[0]);
+ spe_and(f, frag[1], frag[1], pixel[1]);
+ spe_and(f, frag[2], frag[2], pixel[2]);
+ spe_and(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_EQUIV:
+ spe_eqv(f, frag[0], frag[0], pixel[0]);
+ spe_eqv(f, frag[1], frag[1], pixel[1]);
+ spe_eqv(f, frag[2], frag[2], pixel[2]);
+ spe_eqv(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_OR_INVERTED:
+ spe_orc(f, frag[0], pixel[0], frag[0]);
+ spe_orc(f, frag[1], pixel[1], frag[1]);
+ spe_orc(f, frag[2], pixel[2], frag[2]);
+ spe_orc(f, frag[3], pixel[3], frag[3]);
+ break;
+ case PIPE_LOGICOP_COPY:
+ break;
+ case PIPE_LOGICOP_OR_REVERSE:
+ spe_orc(f, frag[0], frag[0], pixel[0]);
+ spe_orc(f, frag[1], frag[1], pixel[1]);
+ spe_orc(f, frag[2], frag[2], pixel[2]);
+ spe_orc(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_OR:
+ spe_or(f, frag[0], frag[0], pixel[0]);
+ spe_or(f, frag[1], frag[1], pixel[1]);
+ spe_or(f, frag[2], frag[2], pixel[2]);
+ spe_or(f, frag[3], frag[3], pixel[3]);
+ break;
+ case PIPE_LOGICOP_SET:
+ spe_il(f, frag[0], ~0);
+ spe_il(f, frag[1], ~0);
+ spe_il(f, frag[2], ~0);
+ spe_il(f, frag[3], ~0);
+ break;
+
+ /* These two cases are short-circuited above.
+ */
+ case PIPE_LOGICOP_INVERT:
+ case PIPE_LOGICOP_NOOP:
+ default:
+ assert(0);
+ }
+
+
+ /* Apply fragment mask.
+ */
+ spe_ilh(f, tmp[0], 0x0000);
+ spe_ilh(f, tmp[1], 0x0404);
+ spe_ilh(f, tmp[2], 0x0808);
+ spe_ilh(f, tmp[3], 0x0c0c);
+
+ spe_shufb(f, tmp[0], mask, mask, tmp[0]);
+ spe_shufb(f, tmp[1], mask, mask, tmp[1]);
+ spe_shufb(f, tmp[2], mask, mask, tmp[2]);
+ spe_shufb(f, tmp[3], mask, mask, tmp[3]);
+
+ spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
+ spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
+ spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
+ spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
+
+ spe_bi(f, 0, 0, 0);
+
+#if 0
+ {
+ const uint32_t *p = f->store;
+ unsigned i;
+
+ printf("# %u instructions\n", f->csr - f->store);
+
+ printf("\t.text\n");
+ for (i = 0; i < 64; i++) {
+ printf("\t.long\t0x%04x\n", p[i]);
+ }
+ fflush(stdout);
+ }
+#endif
}