Merge branch 'mesa_7_7_branch'
[mesa.git] / src / gallium / drivers / i965 / brw_wm_state.c
index 361f91292bec0a51221819a323907c34f8870a71..ee970ac75bcd0a53a019e6aa2e7b8e5a880d8281 100644 (file)
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
                    
-
+#include "util/u_math.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_wm.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
 
 /***********************************************************************
  * WM unit - fragment programs and rasterization
@@ -52,7 +54,7 @@ struct brw_wm_unit_key {
    unsigned int max_threads;
 
    unsigned int nr_surfaces, sampler_count;
-   GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
+   GLboolean uses_depth, computes_depth, uses_kill, has_flow_control;
    GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
    GLfloat offset_units, offset_factor;
 };
@@ -60,14 +62,11 @@ struct brw_wm_unit_key {
 static void
 wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   const struct gl_fragment_program *fp = brw->fragment_program;
-   const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
-   struct intel_context *intel = &brw->intel;
+   const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
 
    memset(key, 0, sizeof(*key));
 
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+   if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
       key->max_threads = 1;
    else {
       /* WM maximum threads is number of EUs times number of threads per EU. */
@@ -84,7 +83,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
    key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
    key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
-   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+   key->total_scratch = align(brw->wm.prog_data->total_scratch, 1024);
 
    /* BRW_NEW_URB_FENCE */
    key->urb_size = brw->urb.vsize;
@@ -98,55 +97,60 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    /* CACHE_NEW_SAMPLER */
    key->sampler_count = brw->wm.sampler_count;
 
-   /* _NEW_POLYGONSTIPPLE */
-   key->polygon_stipple = ctx->Polygon.StippleFlag;
+   /* PIPE_NEW_RAST */
+   key->polygon_stipple = brw->curr.rast->templ.poly_stipple_enable;
 
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+   /* PIPE_NEW_FRAGMENT_PROGRAM */
+   key->uses_depth = fp->uses_depth;
+   key->computes_depth = fp->info.writes_z;
 
-   /* as far as we can tell */
-   key->computes_depth =
-      (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
-   /* BRW_NEW_DEPTH_BUFFER
+   /* PIPE_NEW_DEPTH_BUFFER
+    *
     * Override for NULL depthbuffer case, required by the Pixel Shader Computed
     * Depth field.
     */
-   if (brw->state.depth_region == NULL)
+   if (brw->curr.fb.zsbuf == NULL)
       key->computes_depth = 0;
 
-   /* _NEW_COLOR */
-   key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
-   key->is_glsl = bfp->isGLSL;
+   /* PIPE_NEW_DEPTH_STENCIL_ALPHA */
+   key->uses_kill = (fp->info.uses_kill || 
+                    brw->curr.zstencil->cc3.alpha_test);
+
+   key->has_flow_control = fp->has_flow_control;
 
    /* temporary sanity check assertion */
-   ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+   assert(fp->has_flow_control == 0);
+
+   /* PIPE_NEW_QUERY */
+   key->stats_wm = (brw->query.stats_wm != 0);
 
-   /* _NEW_DEPTH */
-   key->stats_wm = intel->stats_wm;
+   /* PIPE_NEW_RAST */
+   key->line_stipple = brw->curr.rast->templ.line_stipple_enable;
 
-   /* _NEW_LINE */
-   key->line_stipple = ctx->Line.StippleFlag;
 
-   /* _NEW_POLYGON */
-   key->offset_enable = ctx->Polygon.OffsetFill;
-   key->offset_units = ctx->Polygon.OffsetUnits;
-   key->offset_factor = ctx->Polygon.OffsetFactor;
+   key->offset_enable = (brw->curr.rast->templ.offset_cw ||
+                        brw->curr.rast->templ.offset_ccw);
+
+   key->offset_units = brw->curr.rast->templ.offset_units;
+   key->offset_factor = brw->curr.rast->templ.offset_scale;
 }
 
 /**
  * Setup wm hardware state.  See page 225 of Volume 2
  */
-static dri_bo *
+static enum pipe_error
 wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
-                       dri_bo **reloc_bufs)
+                       struct brw_winsys_reloc *reloc,
+                        unsigned nr_reloc,
+                        struct brw_winsys_buffer **bo_out)
 {
    struct brw_wm_unit_state wm;
-   dri_bo *bo;
+   enum pipe_error ret;
 
    memset(&wm, 0, sizeof(wm));
 
-   wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
-   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+   wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+   wm.thread0.kernel_start_pointer = 0; /* reloc */
    wm.thread1.depth_coef_urb_read_offset = 1;
    wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
@@ -156,8 +160,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
       wm.thread1.binding_table_entry_count = key->nr_surfaces;
 
    if (key->total_scratch != 0) {
-      wm.thread2.scratch_space_base_pointer =
-        brw->wm.scratch_bo->offset >> 10; /* reloc */
+      wm.thread2.scratch_space_base_pointer = 0; /* reloc */
       wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
    } else {
       wm.thread2.scratch_space_base_pointer = 0;
@@ -175,18 +178,14 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    else
       wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
 
-   if (brw->wm.sampler_bo != NULL) {
-      /* reloc */
-      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
-   } else {
-      wm.wm4.sampler_state_pointer = 0;
-   }
+   /* reloc */
+   wm.wm4.sampler_state_pointer = 0;
 
    wm.wm5.program_uses_depth = key->uses_depth;
    wm.wm5.program_computes_depth = key->computes_depth;
    wm.wm5.program_uses_killpixel = key->uses_kill;
 
-   if (key->is_glsl)
+   if (key->has_flow_control)
       wm.wm5.enable_8_pix = 1;
    else
       wm.wm5.enable_16_pix = 1;
@@ -217,51 +216,36 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    wm.wm5.line_stipple = key->line_stipple;
 
-   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
+   if ((BRW_DEBUG & DEBUG_STATS) || key->stats_wm)
       wm.wm4.stats_enable = 1;
 
-   bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
-                        key, sizeof(*key),
-                        reloc_bufs, 3,
-                        &wm, sizeof(wm),
-                        NULL, NULL);
-
-   /* Emit WM program relocation */
-   dri_bo_emit_reloc(bo,
-                    I915_GEM_DOMAIN_INSTRUCTION, 0,
-                    wm.thread0.grf_reg_count << 1,
-                    offsetof(struct brw_wm_unit_state, thread0),
-                    brw->wm.prog_bo);
+   ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+                          key, sizeof(*key),
+                          reloc, nr_reloc,
+                          &wm, sizeof(wm),
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
 
-   /* Emit scratch space relocation */
-   if (key->total_scratch != 0) {
-      dri_bo_emit_reloc(bo,
-                       0, 0,
-                       wm.thread2.per_thread_scratch_space,
-                       offsetof(struct brw_wm_unit_state, thread2),
-                       brw->wm.scratch_bo);
-   }
-
-   /* Emit sampler state relocation */
-   if (key->sampler_count != 0) {
-      dri_bo_emit_reloc(bo,
-                       I915_GEM_DOMAIN_INSTRUCTION, 0,
-                       wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
-                       offsetof(struct brw_wm_unit_state, wm4),
-                       brw->wm.sampler_bo);
-   }
-
-   return bo;
+   return PIPE_OK;
 }
 
 
-static void upload_wm_unit( struct brw_context *brw )
+static enum pipe_error upload_wm_unit( struct brw_context *brw )
 {
-   struct intel_context *intel = &brw->intel;
    struct brw_wm_unit_key key;
-   dri_bo *reloc_bufs[3];
+   struct brw_winsys_reloc reloc[3];
+   unsigned nr_reloc = 0;
+   enum pipe_error ret;
+   unsigned grf_reg_count;
+   unsigned per_thread_scratch_space;
+   unsigned stats_enable;
+   unsigned sampler_count;
+
    wm_unit_populate_key(brw, &key);
 
+
    /* Allocate the necessary scratch space if we haven't already.  Don't
     * bother reducing the allocation later, since we use scratch so
     * rarely.
@@ -270,43 +254,81 @@ static void upload_wm_unit( struct brw_context *brw )
    if (key.total_scratch) {
       GLuint total = key.total_scratch * key.max_threads;
 
-      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
-        dri_bo_unreference(brw->wm.scratch_bo);
-        brw->wm.scratch_bo = NULL;
-      }
+      /* Do we need a new buffer:
+       */
+      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) 
+        bo_reference(&brw->wm.scratch_bo, NULL);
+
       if (brw->wm.scratch_bo == NULL) {
-        brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr,
-                                           "wm scratch",
-                                           total,
-                                           4096);
+        ret = brw->sws->bo_alloc(brw->sws,
+                                  BRW_BUFFER_TYPE_SHADER_SCRATCH,
+                                  total,
+                                  4096,
+                                  &brw->wm.scratch_bo);
+         if (ret)
+            return ret;
       }
    }
 
-   reloc_bufs[0] = brw->wm.prog_bo;
-   reloc_bufs[1] = brw->wm.scratch_bo;
-   reloc_bufs[2] = brw->wm.sampler_bo;
-
-   dri_bo_unreference(brw->wm.state_bo);
-   brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
-                                      &key, sizeof(key),
-                                      reloc_bufs, 3,
-                                      NULL);
-   if (brw->wm.state_bo == NULL) {
-      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
+
+   /* XXX: temporary:
+    */
+   grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+   per_thread_scratch_space = key.total_scratch / 1024 - 1;
+   stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm;
+   sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4;
+
+   /* Emit WM program relocation */
+   make_reloc(&reloc[nr_reloc++],
+              BRW_USAGE_STATE,
+              grf_reg_count << 1,
+              offsetof(struct brw_wm_unit_state, thread0),
+              brw->wm.prog_bo);
+
+   /* Emit scratch space relocation */
+   if (key.total_scratch != 0) {
+      make_reloc(&reloc[nr_reloc++],
+                 BRW_USAGE_SCRATCH,
+                 per_thread_scratch_space,
+                 offsetof(struct brw_wm_unit_state, thread2),
+                 brw->wm.scratch_bo);
    }
+
+   /* Emit sampler state relocation */
+   if (key.sampler_count != 0) {
+      make_reloc(&reloc[nr_reloc++],
+                 BRW_USAGE_STATE,
+                 stats_enable | (sampler_count << 2),
+                 offsetof(struct brw_wm_unit_state, wm4),
+                 brw->wm.sampler_bo);
+   }
+
+
+   if (brw_search_cache(&brw->cache, BRW_WM_UNIT,
+                        &key, sizeof(key),
+                        reloc, nr_reloc,
+                        NULL,
+                        &brw->wm.state_bo))
+      return PIPE_OK;
+
+   ret = wm_unit_create_from_key(brw, &key, 
+                                 reloc, nr_reloc,
+                                 &brw->wm.state_bo);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_wm_unit = {
    .dirty = {
-      .mesa = (_NEW_POLYGON | 
-              _NEW_POLYGONSTIPPLE | 
-              _NEW_LINE | 
-              _NEW_COLOR |
-              _NEW_DEPTH),
-
-      .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
-             BRW_NEW_CURBE_OFFSETS |
-             BRW_NEW_DEPTH_BUFFER |
+      .mesa = (PIPE_NEW_FRAGMENT_SHADER |
+              PIPE_NEW_DEPTH_BUFFER |
+              PIPE_NEW_RAST | 
+              PIPE_NEW_DEPTH_STENCIL_ALPHA |
+              PIPE_NEW_QUERY),
+
+      .brw = (BRW_NEW_CURBE_OFFSETS |
              BRW_NEW_NR_WM_SURFACES),
 
       .cache = (CACHE_NEW_WM_PROG |