}
for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
- if (dirty & (1 << i))
- OUT_BATCH(i915->current.immediate[i]);
+ if (dirty & (1 << i)) {
+ /* Fixup blend function for A8 dst buffers.
+ * When we blend to an A8 buffer, the GPU thinks it's a G8 buffer,
+ * and therefore we need to use the color factor for alphas. */
+ if ((i == I915_IMMEDIATE_S6) &&
+ (i915->current.target_fixup_format == PIPE_FORMAT_A8_UNORM)) {
+ uint32_t imm = i915->current.immediate[i];
+ uint32_t srcRGB = (imm >> S6_CBUF_SRC_BLEND_FACT_SHIFT) & BLENDFACT_MASK;
+ if (srcRGB == BLENDFACT_DST_ALPHA)
+ srcRGB = BLENDFACT_DST_COLR;
+ else if (srcRGB == BLENDFACT_INV_DST_ALPHA)
+ srcRGB = BLENDFACT_INV_DST_COLR;
+ imm &= ~SRC_BLND_FACT(BLENDFACT_MASK);
+ imm |= SRC_BLND_FACT(srcRGB);
+ OUT_BATCH(imm);
+ } else {
+ OUT_BATCH(i915->current.immediate[i]);
+ }
+ }
}
}
}
}
-static const struct
-{
- enum pipe_format format;
- uint hw_shift_R;
- uint hw_shift_G;
- uint hw_shift_B;
- uint hw_shift_A;
-} fixup_formats[] = {
- { PIPE_FORMAT_R8G8B8A8_UNORM, 20, 24, 28, 16 /* BGRA */},
- { PIPE_FORMAT_L8_UNORM, 28, 28, 28, 16 /* RRRA */},
- { PIPE_FORMAT_I8_UNORM, 28, 28, 28, 16 /* RRRA */},
- { PIPE_FORMAT_A8_UNORM, 16, 16, 16, 16 /* AAAA */},
- { PIPE_FORMAT_NONE, 0, 0, 0, 0},
-};
-
-static boolean need_fixup(struct pipe_surface* p)
-{
- enum pipe_format f;
-
- /* if we don't have a surface bound yet, we don't need to fixup the shader */
- if (!p)
- return FALSE;
-
- f = p->format;
- for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++)
- if (fixup_formats[i].format == f)
- return TRUE;
-
- return FALSE;
-}
-
-static uint fixup_swizzle(enum pipe_format f, uint v)
-{
- int i;
-
- for(i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++)
- if (fixup_formats[i].format == f)
- break;
-
- if (fixup_formats[i].format == PIPE_FORMAT_NONE)
- return v;
-
- uint rgba = v & 0xFFFF0000;
-
- v &= 0xFFFF;
- v |= ((rgba >> fixup_formats[i].hw_shift_R) & 0xF) << 28;
- v |= ((rgba >> fixup_formats[i].hw_shift_G) & 0xF) << 24;
- v |= ((rgba >> fixup_formats[i].hw_shift_B) & 0xF) << 20;
- v |= ((rgba >> fixup_formats[i].hw_shift_A) & 0xF) << 16;
-
- return v;
-}
-
static void
validate_program(struct i915_context *i915, unsigned *batch_space)
{
- *batch_space = i915->fs->program_len;
+ uint additional_size = i915->current.target_fixup_format ? 1 : 0;
+
+ /* we need more batch space if we want to emulate rgba framebuffers */
+ *batch_space = i915->fs->decl_len + i915->fs->program_len + 3 * additional_size;
}
static void
emit_program(struct i915_context *i915)
{
- struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
- boolean need_format_fixup = need_fixup(cbuf_surface);
- int i;
- int fixup_offset = -1;
+ uint need_target_fixup = i915->current.target_fixup_format ? 1 : 0;
+ uint i;
/* we should always have, at least, a pass-through program */
assert(i915->fs->program_len > 0);
- if (need_format_fixup) {
- /* Find where we emit the output color */
- for (i = i915->fs->program_len - 3; i>0; i-=3) {
- uint instr = i915->fs->program[i];
- if ((instr & (REG_NR_MASK << A0_DEST_TYPE_SHIFT)) ==
- (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) ) {
- /* Found it! */
- fixup_offset = i + 1;
- break;
- }
- }
- if (fixup_offset == -1) {
- need_format_fixup = FALSE;
- debug_printf("couldn't find fixup offset\n");
- }
+ /* output the declarations */
+ {
+ /* first word has the size, we have to adjust that */
+ uint size = (i915->fs->decl[0]);
+ size += need_target_fixup * 3;
+ OUT_BATCH(size);
}
- /* emit the program to the hw */
- for (i = 0; i < i915->fs->program_len; i++) {
- if (need_format_fixup && (i == fixup_offset) ) {
- uint v = fixup_swizzle(cbuf_surface->format, i915->fs->program[i]);
- OUT_BATCH(v);
- } else
- OUT_BATCH(i915->fs->program[i]);
+ for (i = 1 ; i < i915->fs->decl_len; i++)
+ OUT_BATCH(i915->fs->decl[i]);
+
+ /* output the program */
+ for (i = 0 ; i < i915->fs->program_len; i++)
+ OUT_BATCH(i915->fs->program[i]);
+
+ /* we emit an additional mov with swizzle to fake RGBA framebuffers */
+ if (need_target_fixup) {
+ /* mov out_color, out_color.zyxw */
+ OUT_BATCH(A0_MOV |
+ (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
+ A0_DEST_CHANNEL_ALL |
+ (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |
+ (T_DIFFUSE << A0_SRC0_NR_SHIFT));
+ OUT_BATCH(i915->current.fixup_swizzle);
+ OUT_BATCH(0);
}
}
else
*batch_space = 0;
+#if 0
+static int counter_total = 0;
+#define VALIDATE_ATOM(atom, hw_dirty) \
+ if (i915->hardware_dirty & hw_dirty) { \
+ static int counter_##atom = 0;\
+ validate_##atom(i915, &tmp); \
+ *batch_space += tmp;\
+ counter_##atom += tmp;\
+ counter_total += tmp;\
+ printf("%s: \t%d/%d \t%2.2f\n",#atom, counter_##atom, counter_total, counter_##atom*100.f/counter_total);}
+#else
#define VALIDATE_ATOM(atom, hw_dirty) \
if (i915->hardware_dirty & hw_dirty) { \
validate_##atom(i915, &tmp); \
*batch_space += tmp; }
+#endif
VALIDATE_ATOM(flush, I915_HW_FLUSH);
VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);