Merge remote branch 'origin/master' into pipe-video

[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_state.c
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c

index d3373ea79e80fb9b333adc3ad8da01b8f992ef39..5b5afc4626b5b02c7f665bfdd6d761f27ef9e522 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -47,20 +47,45 @@ struct brw_wm_unit_key {
     unsigned int dispatch_grf_start_reg;
  
     unsigned int curbe_offset;
-   unsigned int urb_size;
  
     unsigned int nr_surfaces, sampler_count;
     GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
     GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+   GLboolean color_write_enable;
     GLfloat offset_units, offset_factor;
  };
  
+bool
+brw_color_buffer_write_enabled(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   const struct gl_fragment_program *fp = brw->fragment_program;
+   int i;
+
+   /* _NEW_BUFFERS */
+   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+
+      /* _NEW_COLOR */
+      if (rb &&
+         (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
+          fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
+         (ctx->Color.ColorMask[i][0] ||
+          ctx->Color.ColorMask[i][1] ||
+          ctx->Color.ColorMask[i][2] ||
+          ctx->Color.ColorMask[i][3])) {
+        return true;
+      }
+   }
+
+   return false;
+}
+
  static void
  wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
  {
-   GLcontext *ctx = &brw->intel.ctx;
+   struct gl_context *ctx = &brw->intel.ctx;
     const struct gl_fragment_program *fp = brw->fragment_program;
-   const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
     struct intel_context *intel = &brw->intel;
  
     memset(key, 0, sizeof(*key));
@@ -70,10 +95,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
     key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
     key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
     key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
-   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
-
-   /* BRW_NEW_URB_FENCE */
-   key->urb_size = brw->urb.vsize;
+   key->total_scratch = brw->wm.prog_data->total_scratch;
  
     /* BRW_NEW_CURBE_OFFSETS */
     key->curbe_offset = brw->curbe.wm_start;
@@ -100,12 +122,23 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
     if (brw->state.depth_region == NULL)
        key->computes_depth = 0;
  
+   /* _NEW_BUFFERS | _NEW_COLOR */
+   key->color_write_enable = brw_color_buffer_write_enabled(brw);
+
     /* _NEW_COLOR */
     key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
-   key->is_glsl = bfp->isGLSL;
  
-   /* temporary sanity check assertion */
-   ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+   /* If using the fragment shader backend, the program is always
+    * 8-wide.
+    */
+   if (ctx->Shader.CurrentFragmentProgram) {
+      struct brw_shader *shader = (struct brw_shader *)
+        ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
+
+      if (shader != NULL && shader->ir != NULL) {
+        key->is_glsl = GL_TRUE;
+      }
+   }
  
     /* _NEW_DEPTH */
     key->stats_wm = intel->stats_wm;
@@ -122,13 +155,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
  /**
   * Setup wm hardware state.  See page 225 of Volume 2
   */
-static dri_bo *
+static drm_intel_bo *
  wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
-                       dri_bo **reloc_bufs)
+                       drm_intel_bo **reloc_bufs)
  {
     struct intel_context *intel = &brw->intel;
     struct brw_wm_unit_state wm;
-   dri_bo *bo;
+   drm_intel_bo *bo;
  
     memset(&wm, 0, sizeof(wm));
  
@@ -137,7 +170,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
     wm.thread1.depth_coef_urb_read_offset = 1;
     wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
  
-   if (intel->is_ironlake)
+   if (intel->gen == 5)
        wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
     else
        wm.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -145,7 +178,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
     if (key->total_scratch != 0) {
        wm.thread2.scratch_space_base_pointer =
          brw->wm.scratch_bo->offset >> 10; /* reloc */
-      wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
+      wm.thread2.per_thread_scratch_space = ffs(key->total_scratch) - 11;
     } else {
        wm.thread2.scratch_space_base_pointer = 0;
        wm.thread2.per_thread_scratch_space = 0;
@@ -157,7 +190,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
     wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
     wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
  
-   if (intel->is_ironlake)
+   if (intel->gen == 5)
        wm.wm4.sampler_count = 0; /* hardware requirement */
     else
        wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
@@ -179,7 +212,13 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
        wm.wm5.enable_16_pix = 1;
  
     wm.wm5.max_threads = brw->wm_max_threads - 1;
-   wm.wm5.thread_dispatch_enable = 1;  /* AKA: color_write */
+
+   if (key->color_write_enable ||
+       key->uses_kill ||
+       key->computes_depth) {
+      wm.wm5.thread_dispatch_enable = 1;
+   }
+
     wm.wm5.legacy_line_rast = 0;
     wm.wm5.legacy_global_depth_bias = 0;
     wm.wm5.early_depth_test = 1;                /* never need to disable */
@@ -204,38 +243,33 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
  
     wm.wm5.line_stipple = key->line_stipple;
  
-   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
+   if (unlikely(INTEL_DEBUG & DEBUG_STATS) || key->stats_wm)
        wm.wm4.stats_enable = 1;
  
     bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
                          key, sizeof(*key),
                          reloc_bufs, 3,
-                        &wm, sizeof(wm),
-                        NULL, NULL);
+                        &wm, sizeof(wm));
  
     /* Emit WM program relocation */
-   dri_bo_emit_reloc(bo,
-                    I915_GEM_DOMAIN_INSTRUCTION, 0,
-                    wm.thread0.grf_reg_count << 1,
-                    offsetof(struct brw_wm_unit_state, thread0),
-                    brw->wm.prog_bo);
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread0),
+                          brw->wm.prog_bo, wm.thread0.grf_reg_count << 1,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0);
  
     /* Emit scratch space relocation */
     if (key->total_scratch != 0) {
-      dri_bo_emit_reloc(bo,
-                       0, 0,
-                       wm.thread2.per_thread_scratch_space,
-                       offsetof(struct brw_wm_unit_state, thread2),
-                       brw->wm.scratch_bo);
+      drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),
+                             brw->wm.scratch_bo,
+                             wm.thread2.per_thread_scratch_space,
+                             I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
     }
  
     /* Emit sampler state relocation */
     if (key->sampler_count != 0) {
-      dri_bo_emit_reloc(bo,
-                       I915_GEM_DOMAIN_INSTRUCTION, 0,
-                       wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
-                       offsetof(struct brw_wm_unit_state, wm4),
-                       brw->wm.sampler_bo);
+      drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm4),
+                             brw->wm.sampler_bo, (wm.wm4.stats_enable |
+                                                  (wm.wm4.sampler_count << 2)),
+                             I915_GEM_DOMAIN_INSTRUCTION, 0);
     }
  
     return bo;
@@ -246,26 +280,25 @@ static void upload_wm_unit( struct brw_context *brw )
  {
     struct intel_context *intel = &brw->intel;
     struct brw_wm_unit_key key;
-   dri_bo *reloc_bufs[3];
+   drm_intel_bo *reloc_bufs[3];
     wm_unit_populate_key(brw, &key);
  
     /* Allocate the necessary scratch space if we haven't already.  Don't
      * bother reducing the allocation later, since we use scratch so
      * rarely.
      */
-   assert(key.total_scratch <= 12 * 1024);
     if (key.total_scratch) {
        GLuint total = key.total_scratch * brw->wm_max_threads;
  
        if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
-        dri_bo_unreference(brw->wm.scratch_bo);
+        drm_intel_bo_unreference(brw->wm.scratch_bo);
          brw->wm.scratch_bo = NULL;
        }
        if (brw->wm.scratch_bo == NULL) {
-        brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr,
-                                           "wm scratch",
-                                           total,
-                                           4096);
+        brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
+                                                "wm scratch",
+                                                total,
+                                                4096);
        }
     }
  
@@ -273,7 +306,7 @@ static void upload_wm_unit( struct brw_context *brw )
     reloc_bufs[1] = brw->wm.scratch_bo;
     reloc_bufs[2] = brw->wm.sampler_bo;
  
-   dri_bo_unreference(brw->wm.state_bo);
+   drm_intel_bo_unreference(brw->wm.state_bo);
     brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
                                        &key, sizeof(key),
                                        reloc_bufs, 3,
@@ -289,7 +322,8 @@ const struct brw_tracked_state brw_wm_unit = {
                _NEW_POLYGONSTIPPLE | 
                _NEW_LINE | 
                _NEW_COLOR |
-              _NEW_DEPTH),
+              _NEW_DEPTH |
+              _NEW_BUFFERS),
  
        .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
               BRW_NEW_CURBE_OFFSETS |