i965/fs: Don't consider the stencil output to be a color output.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_draw.c
index 9d034cfdb33b8c82d67c0a500c7a01dc74c98e5c..9b1e18c51cff9223fa633c91448bfffbeb69e112 100644 (file)
@@ -38,6 +38,7 @@
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
 #include "drivers/common/meta.h"
+#include "util/bitscan.h"
 
 #include "brw_blorp.h"
 #include "brw_draw.h"
@@ -301,16 +302,15 @@ brw_merge_inputs(struct brw_context *brw,
    }
 
    if (brw->gen < 8 && !brw->is_haswell) {
-      struct gl_program *vp = &ctx->VertexProgram._Current->Base;
+      GLbitfield64 mask = ctx->VertexProgram._Current->Base.InputsRead;
       /* Prior to Haswell, the hardware can't natively support GL_FIXED or
        * 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
        */
-      for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-         if (!(vp->InputsRead & BITFIELD64_BIT(i)))
-            continue;
-
+      while (mask) {
          uint8_t wa_flags = 0;
 
+         i = u_bit_scan64(&mask);
+
          switch (brw->vb.inputs[i].glarray->Type) {
 
          case GL_FIXED:
@@ -386,8 +386,32 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
       struct intel_renderbuffer *irb =
          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 
-      if (irb)
+      if (irb) {
          brw_render_cache_set_add_bo(brw, irb->mt->bo);
+
+         if (intel_miptree_is_lossless_compressed(brw, irb->mt)) {
+            irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED;
+         }
+      }
+   }
+}
+
+static void
+brw_predraw_set_aux_buffers(struct brw_context *brw)
+{
+   if (brw->gen < 9)
+      return;
+
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
+      struct intel_renderbuffer *irb =
+         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+      if (irb) {
+         intel_miptree_prepare_mcs(brw, irb->mt);
+      }
    }
 }
 
@@ -400,6 +424,7 @@ brw_try_draw_prims(struct gl_context *ctx,
                    const struct _mesa_prim *prims,
                    GLuint nr_prims,
                    const struct _mesa_index_buffer *ib,
+                   bool index_bounds_valid,
                    GLuint min_index,
                    GLuint max_index,
                    struct brw_transform_feedback_object *xfb_obj,
@@ -427,17 +452,18 @@ brw_try_draw_prims(struct gl_context *ctx,
     * index.
     */
    brw->wm.base.sampler_count =
-      _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed);
+      util_last_bit(ctx->FragmentProgram._Current->Base.SamplersUsed);
    brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
-      _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0;
+      util_last_bit(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0;
    brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ?
-      _mesa_fls(ctx->TessEvalProgram._Current->Base.SamplersUsed) : 0;
+      util_last_bit(ctx->TessEvalProgram._Current->Base.SamplersUsed) : 0;
    brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ?
-      _mesa_fls(ctx->TessCtrlProgram._Current->Base.SamplersUsed) : 0;
+      util_last_bit(ctx->TessCtrlProgram._Current->Base.SamplersUsed) : 0;
    brw->vs.base.sampler_count =
-      _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed);
+      util_last_bit(ctx->VertexProgram._Current->Base.SamplersUsed);
 
    intel_prepare_render(brw);
+   brw_predraw_set_aux_buffers(brw);
 
    /* This workaround has to happen outside of brw_upload_render_state()
     * because it may flush the batchbuffer for a blit, affecting the state
@@ -452,6 +478,7 @@ brw_try_draw_prims(struct gl_context *ctx,
    brw->ib.ib = ib;
    brw->ctx.NewDriverState |= BRW_NEW_INDICES;
 
+   brw->vb.index_bounds_valid = index_bounds_valid;
    brw->vb.min_index = min_index;
    brw->vb.max_index = max_index;
    brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
@@ -475,9 +502,11 @@ brw_try_draw_prims(struct gl_context *ctx,
       intel_batchbuffer_save_state(brw);
 
       if (brw->num_instances != prims[i].num_instances ||
-          brw->basevertex != prims[i].basevertex) {
+          brw->basevertex != prims[i].basevertex ||
+          brw->baseinstance != prims[i].base_instance) {
          brw->num_instances = prims[i].num_instances;
          brw->basevertex = prims[i].basevertex;
+         brw->baseinstance = prims[i].base_instance;
          if (i > 0) { /* For i == 0 we just did this before the loop */
             brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
             brw_merge_inputs(brw, arrays);
@@ -634,14 +663,15 @@ brw_draw_prims(struct gl_context *ctx,
       perf_debug("Scanning index buffer to compute index buffer bounds.  "
                  "Use glDrawRangeElements() to avoid this.\n");
       vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
+      index_bounds_valid = true;
    }
 
    /* Try drawing with the hardware, but don't do anything else if we can't
     * manage it.  swrast doesn't support our featureset, so we can't fall back
     * to it.
     */
-   brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index,
-                      xfb_obj, stream, indirect);
+   brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, index_bounds_valid,
+                      min_index, max_index, xfb_obj, stream, indirect);
 }
 
 void