radeonsi: kill point size VS output if it's not used by the rasterizer
[mesa.git] / src / gallium / drivers / i915 / i915_state_emit.c
index 39fb13aec7ec936e4b6e1e5fb76bb66783365e22..9f0f9e33ca3fc84ea8645369e1ad2e2ac4b6aaab 100644 (file)
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2003 VMware, Inc.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "i915_context.h"
 #include "i915_batch.h"
 #include "i915_debug.h"
+#include "i915_fpc.h"
 #include "i915_resource.h"
 
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_format.h"
 
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -109,7 +110,7 @@ static void
 emit_invariant(struct i915_context *i915)
 {
    i915_winsys_batchbuffer_write(i915->batch, invariant_state,
-                                 Elements(invariant_state)*sizeof(uint32_t));
+                                 ARRAY_SIZE(invariant_state)*sizeof(uint32_t));
 }
 
 static void
@@ -127,6 +128,70 @@ validate_immediate(struct i915_context *i915, unsigned *batch_space)
    *batch_space = 1 + util_bitcount(dirty);
 }
 
+static uint target_fixup(struct pipe_surface *p, int component)
+{
+   const struct
+   {
+      enum pipe_format format;
+      uint hw_mask[4];
+   } fixup_mask[] = {
+      { PIPE_FORMAT_R8G8B8A8_UNORM, { S5_WRITEDISABLE_BLUE, S5_WRITEDISABLE_GREEN, S5_WRITEDISABLE_RED, S5_WRITEDISABLE_ALPHA}},
+      { PIPE_FORMAT_R8G8B8X8_UNORM, { S5_WRITEDISABLE_BLUE, S5_WRITEDISABLE_GREEN, S5_WRITEDISABLE_RED, S5_WRITEDISABLE_ALPHA}},
+      { PIPE_FORMAT_L8_UNORM,       { S5_WRITEDISABLE_RED | S5_WRITEDISABLE_GREEN | S5_WRITEDISABLE_BLUE, 0, 0, S5_WRITEDISABLE_ALPHA}},
+      { PIPE_FORMAT_I8_UNORM,       { S5_WRITEDISABLE_RED | S5_WRITEDISABLE_GREEN | S5_WRITEDISABLE_BLUE, 0, 0, S5_WRITEDISABLE_ALPHA}},
+      { PIPE_FORMAT_A8_UNORM,       { 0, 0, 0, S5_WRITEDISABLE_RED | S5_WRITEDISABLE_GREEN | S5_WRITEDISABLE_BLUE | S5_WRITEDISABLE_ALPHA}},
+      { 0,                          { S5_WRITEDISABLE_RED, S5_WRITEDISABLE_GREEN, S5_WRITEDISABLE_BLUE, S5_WRITEDISABLE_ALPHA}}
+   };
+   int i = sizeof(fixup_mask) / sizeof(*fixup_mask) - 1;
+
+   if (p)
+      for(i = 0; fixup_mask[i].format != 0; i++)
+         if (p->format == fixup_mask[i].format)
+            return fixup_mask[i].hw_mask[component];
+
+   /* Just return default masks */
+   return fixup_mask[i].hw_mask[component];
+}
+
+static void emit_immediate_s5(struct i915_context *i915, uint imm)
+{
+   /* Fixup write mask for non-BGRA render targets */
+   uint fixup_imm = imm & ~( S5_WRITEDISABLE_RED | S5_WRITEDISABLE_GREEN |
+                             S5_WRITEDISABLE_BLUE | S5_WRITEDISABLE_ALPHA );
+   struct pipe_surface *surf = i915->framebuffer.cbufs[0];
+
+   if (imm & S5_WRITEDISABLE_RED)
+      fixup_imm |= target_fixup(surf, 0);
+   if (imm & S5_WRITEDISABLE_GREEN)
+      fixup_imm |= target_fixup(surf, 1);
+   if (imm & S5_WRITEDISABLE_BLUE)
+      fixup_imm |= target_fixup(surf, 2);
+   if (imm & S5_WRITEDISABLE_ALPHA)
+      fixup_imm |= target_fixup(surf, 3);
+
+   OUT_BATCH(fixup_imm);
+}
+
+static void emit_immediate_s6(struct i915_context *i915, uint imm)
+{
+   /* Fixup blend function for A8 dst buffers.
+    * When we blend to an A8 buffer, the GPU thinks it's a G8 buffer,
+    * and therefore we need to use the color factor for alphas. */
+   uint srcRGB;
+
+   if (i915->current.target_fixup_format == PIPE_FORMAT_A8_UNORM) {
+      srcRGB = (imm >> S6_CBUF_SRC_BLEND_FACT_SHIFT) & BLENDFACT_MASK;
+      if (srcRGB == BLENDFACT_DST_ALPHA)
+         srcRGB = BLENDFACT_DST_COLR;
+      else if (srcRGB == BLENDFACT_INV_DST_ALPHA)
+         srcRGB = BLENDFACT_INV_DST_COLR;
+      imm &= ~SRC_BLND_FACT(BLENDFACT_MASK);
+      imm |= SRC_BLND_FACT(srcRGB);
+   }
+
+   OUT_BATCH(imm);
+}
+
 static void
 emit_immediate(struct i915_context *i915)
 {
@@ -151,8 +216,14 @@ emit_immediate(struct i915_context *i915)
    }
 
    for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
-      if (dirty & (1 << i))
-         OUT_BATCH(i915->current.immediate[i]);
+      if (dirty & (1 << i)) {
+         if (i == I915_IMMEDIATE_S5)
+            emit_immediate_s5(i915, i915->current.immediate[i]);
+         else if (i == I915_IMMEDIATE_S6)
+            emit_immediate_s6(i915, i915->current.immediate[i]);
+         else
+            OUT_BATCH(i915->current.immediate[i]);
+      }
    }
 }
 
@@ -255,11 +326,13 @@ emit_map(struct i915_context *i915)
          if (enabled & (1 << unit)) {
             struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture);
             struct i915_winsys_buffer *buf = texture->buffer;
+            unsigned offset = i915->current.texbuffer[unit][2];
+
             assert(buf);
 
             count++;
 
-            OUT_RELOC(buf, I915_USAGE_SAMPLER, 0);
+            OUT_RELOC(buf, I915_USAGE_SAMPLER, offset);
             OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
             OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
          }
@@ -299,8 +372,10 @@ emit_sampler(struct i915_context *i915)
 static void
 validate_constants(struct i915_context *i915, unsigned *batch_space)
 {
-   *batch_space = i915->fs->num_constants ?
+   int nr = i915->fs->num_constants ?
       2 + 4*i915->fs->num_constants : 0;
+
+   *batch_space = nr;
 }
 
 static void
@@ -310,6 +385,8 @@ emit_constants(struct i915_context *i915)
     * immediates according to the constant_flags[] array.
     */
    const uint nr = i915->fs->num_constants;
+
+   assert(nr < I915_MAX_CONSTANT);
    if (nr) {
       uint i;
 
@@ -343,100 +420,59 @@ emit_constants(struct i915_context *i915)
    }
 }
 
-static const struct
-{
-   enum pipe_format format;
-   uint hw_shift_R;
-   uint hw_shift_G;
-   uint hw_shift_B;
-   uint hw_shift_A;
-} fixup_formats[] = {
-   { PIPE_FORMAT_R8G8B8A8_UNORM, 20, 24, 28, 16 /* BGRA */},
-   { PIPE_FORMAT_L8_UNORM,       28, 28, 28, 16 /* RRRA */},
-   { PIPE_FORMAT_I8_UNORM,       28, 28, 28, 16 /* RRRA */},
-   { PIPE_FORMAT_A8_UNORM,       16, 16, 16, 16 /* AAAA */},
-   { PIPE_FORMAT_NONE,           0,   0,  0,  0},
-};
-
-static boolean need_fixup(struct pipe_surface* p)
-{
-   enum pipe_format f;
-
-   /* if we don't have a surface bound yet, we don't need to fixup the shader */
-   if (!p)
-      return FALSE;
-
-   f = p->format;
-   for(int i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++)
-      if (fixup_formats[i].format == f)
-         return TRUE;
-
-   return FALSE;
-}
-
-static uint fixup_swizzle(enum pipe_format f, uint v)
-{
-   int i;
-
-   for(i=0; fixup_formats[i].format != PIPE_FORMAT_NONE; i++)
-      if (fixup_formats[i].format == f)
-           break;
-
-   if (fixup_formats[i].format == PIPE_FORMAT_NONE)
-          return v;
-
-   uint rgba = v & 0xFFFF0000;
-
-   v &= 0xFFFF;
-   v |= ((rgba >> fixup_formats[i].hw_shift_R) & 0xF) << 28;
-   v |= ((rgba >> fixup_formats[i].hw_shift_G) & 0xF) << 24;
-   v |= ((rgba >> fixup_formats[i].hw_shift_B) & 0xF) << 20;
-   v |= ((rgba >> fixup_formats[i].hw_shift_A) & 0xF) << 16;
-
-   return v;
-}
-
 static void
 validate_program(struct i915_context *i915, unsigned *batch_space)
 {
-   *batch_space = i915->fs->program_len;
+   uint additional_size = 0;
+
+   additional_size += i915->current.target_fixup_format ? 3 : 0;
+
+   /* we need more batch space if we want to emulate rgba framebuffers */
+   *batch_space = i915->fs->decl_len + i915->fs->program_len + additional_size;
 }
 
 static void
 emit_program(struct i915_context *i915)
 {
-   struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0];
-   boolean need_format_fixup = need_fixup(cbuf_surface);
-   int i;
-   int fixup_offset = -1;
+   uint additional_size = 0;
+   uint i;
+
+   /* count how much additional space we'll need */
+   validate_program(i915, &additional_size);
+   additional_size -= i915->fs->decl_len + i915->fs->program_len;
 
    /* we should always have, at least, a pass-through program */
    assert(i915->fs->program_len > 0);
 
-   if (need_format_fixup) {
-      /* Find where we emit the output color */
-      for (i = i915->fs->program_len - 3; i>0; i-=3) {
-         uint instr = i915->fs->program[i];
-         if ((instr & (REG_NR_MASK << A0_DEST_TYPE_SHIFT)) == 
-             (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) ) {
-            /* Found it! */
-            fixup_offset = i + 1;
-            break;
-        }
-      }
-      if (fixup_offset == -1) {
-         need_format_fixup = FALSE;
-         debug_printf("couldn't find fixup offset\n");
-      }
+   /* output the declarations */
+   {
+      /* first word has the size, we have to adjust that */
+      uint size = (i915->fs->decl[0]);
+      size += additional_size;
+      OUT_BATCH(size);
    }
 
-   /* emit the program to the hw */
-   for (i = 0; i < i915->fs->program_len; i++) {
-      if (need_format_fixup && (i == fixup_offset) ) {
-         uint v = fixup_swizzle(cbuf_surface->format, i915->fs->program[i]);
-         OUT_BATCH(v);
-      } else
-         OUT_BATCH(i915->fs->program[i]);
+   for (i = 1 ; i < i915->fs->decl_len; i++)
+      OUT_BATCH(i915->fs->decl[i]);
+
+   /* output the program */
+   assert(i915->fs->program_len % 3 == 0);
+   for (i = 0 ; i < i915->fs->program_len; i+=3) {
+      OUT_BATCH(i915->fs->program[i]);
+      OUT_BATCH(i915->fs->program[i+1]);
+      OUT_BATCH(i915->fs->program[i+2]);
+   }
+
+   /* we emit an additional mov with swizzle to fake RGBA framebuffers */
+   if (i915->current.target_fixup_format) {
+      /* mov out_color, out_color.zyxw */
+      OUT_BATCH(A0_MOV |
+                (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
+                A0_DEST_CHANNEL_ALL |
+                (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |
+                (T_DIFFUSE << A0_SRC0_NR_SHIFT));
+      OUT_BATCH(i915->current.fixup_swizzle);
+      OUT_BATCH(0);
    }
 }
 
@@ -459,14 +495,26 @@ i915_validate_state(struct i915_context *i915, unsigned *batch_space)
 
    i915->num_validation_buffers = 0;
    if (i915->hardware_dirty & I915_HW_INVARIANT)
-      *batch_space = Elements(invariant_state);
+      *batch_space = ARRAY_SIZE(invariant_state);
    else
       *batch_space = 0;
 
+#if 0
+static int counter_total = 0;
+#define VALIDATE_ATOM(atom, hw_dirty) \
+   if (i915->hardware_dirty & hw_dirty) { \
+      static int counter_##atom = 0;\
+      validate_##atom(i915, &tmp); \
+      *batch_space += tmp;\
+      counter_##atom += tmp;\
+      counter_total += tmp;\
+      printf("%s: \t%d/%d \t%2.2f\n",#atom, counter_##atom, counter_total, counter_##atom*100.f/counter_total);}
+#else
 #define VALIDATE_ATOM(atom, hw_dirty) \
    if (i915->hardware_dirty & hw_dirty) { \
       validate_##atom(i915, &tmp); \
       *batch_space += tmp; }
+#endif
    VALIDATE_ATOM(flush, I915_HW_FLUSH);
    VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
    VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
@@ -501,12 +549,12 @@ i915_emit_hardware_state(struct i915_context *i915 )
       i915_dump_hardware_dirty(i915, __FUNCTION__);
 
    if (!i915_validate_state(i915, &batch_space)) {
-      FLUSH_BATCH(NULL);
+      FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
       assert(i915_validate_state(i915, &batch_space));
    }
 
    if(!BEGIN_BATCH(batch_space)) {
-      FLUSH_BATCH(NULL);
+      FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
       assert(i915_validate_state(i915, &batch_space));
       assert(BEGIN_BATCH(batch_space));
    }