i965/nir/vec4: Prepare source and destination registers for ALU operations

[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c

index f24fcc730e507dc23ce7c7cea0e73861f36fc24e..6096b4946a00d7b6d20041a8130881426f137a74 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -40,6 +40,8 @@
  #include "brw_ff_gs.h"
  #include "brw_gs.h"
  #include "brw_wm.h"
+#include "brw_cs.h"
+#include "main/framebuffer.h"
  
  static const struct brw_tracked_state *gen4_atoms[] =
  {
@@ -190,6 +192,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
     &gen6_color_calc_state,     /* must do before cc unit */
     &gen6_depth_stencil_state,  /* must do before cc unit */
  
+   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
+
     &gen6_vs_push_constants, /* Before vs_state */
     &gen6_gs_push_constants, /* Before gs_state */
     &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -246,6 +250,13 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
     &haswell_cut_index,
  };
  
+static const struct brw_tracked_state *gen7_compute_atoms[] =
+{
+   &brw_state_base_address,
+   &brw_cs_abo_surfaces,
+   &brw_cs_state,
+};
+
  static const struct brw_tracked_state *gen8_render_atoms[] =
  {
     /* Command packets: */
@@ -259,6 +270,8 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
     &gen8_blend_state,
     &gen6_color_calc_state,
  
+   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
+
     &gen6_vs_push_constants, /* Before vs_state */
     &gen6_gs_push_constants, /* Before gs_state */
     &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -322,6 +335,13 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
     &gen8_pma_fix,
  };
  
+static const struct brw_tracked_state *gen8_compute_atoms[] =
+{
+   &gen8_state_base_address,
+   &brw_cs_abo_surfaces,
+   &brw_cs_state,
+};
+
  static void
  brw_upload_initial_gpu_state(struct brw_context *brw)
  {
@@ -333,7 +353,7 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
        return;
  
     if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
+      brw_emit_post_sync_nonzero_flush(brw);
  
     brw_upload_invariant_state(brw);
  
@@ -359,8 +379,10 @@ brw_get_pipeline_atoms(struct brw_context *brw,
     switch (pipeline) {
     case BRW_RENDER_PIPELINE:
        return brw->render_atoms;
+   case BRW_COMPUTE_PIPELINE:
+      return brw->compute_atoms;
     default:
-      STATIC_ASSERT(BRW_NUM_PIPELINES == 1);
+      STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
        unreachable("Unsupported pipeline");
        return NULL;
     }
@@ -391,12 +413,19 @@ void brw_init_state( struct brw_context *brw )
  {
     struct gl_context *ctx = &brw->ctx;
  
+   /* Force the first brw_select_pipeline to emit pipeline select */
+   brw->last_pipeline = BRW_NUM_PIPELINES;
+
     STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
     STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
     STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
                   ARRAY_SIZE(brw->render_atoms));
     STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
                   ARRAY_SIZE(brw->render_atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
+                 ARRAY_SIZE(brw->compute_atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
+                 ARRAY_SIZE(brw->compute_atoms));
  
     brw_init_caches(brw);
  
@@ -404,10 +433,16 @@ void brw_init_state( struct brw_context *brw )
        brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
                                gen8_render_atoms,
                                ARRAY_SIZE(gen8_render_atoms));
+      brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
+                              gen8_compute_atoms,
+                              ARRAY_SIZE(gen8_compute_atoms));
     } else if (brw->gen == 7) {
        brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
                                gen7_render_atoms,
                                ARRAY_SIZE(gen7_render_atoms));
+      brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
+                              gen7_compute_atoms,
+                              ARRAY_SIZE(gen7_compute_atoms));
     } else if (brw->gen == 6) {
        brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
                                gen6_atoms, ARRAY_SIZE(gen6_atoms));
@@ -418,18 +453,18 @@ void brw_init_state( struct brw_context *brw )
  
     brw_upload_initial_gpu_state(brw);
  
-   brw->state.dirty.mesa = ~0;
-   brw->state.dirty.brw = ~0ull;
+   brw->NewGLState = ~0;
+   brw->ctx.NewDriverState = ~0ull;
  
     /* ~0 is a nonsensical value which won't match anything we program, so
      * the programming will take effect on the first time around.
      */
     brw->pma_stall_bits = ~0;
  
-   /* Make sure that brw->state.dirty.brw has enough bits to hold all possible
+   /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
      * dirty flags.
      */
-   STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw));
+   STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
  
     ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
     ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
@@ -521,6 +556,7 @@ static struct dirty_bit_map brw_bits[] = {
     DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
     DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
     DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
     DEFINE_BIT(BRW_NEW_URB_FENCE),
     DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
     DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
@@ -560,6 +596,7 @@ static struct dirty_bit_map brw_bits[] = {
     DEFINE_BIT(BRW_NEW_CLIP_VP),
     DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
     DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
+   DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
     {0, 0, 0}
  };
  
@@ -596,6 +633,8 @@ brw_upload_programs(struct brw_context *brw,
           brw_upload_gs_prog(brw);
  
        brw_upload_wm_prog(brw);
+   } else if (pipeline == BRW_COMPUTE_PIPELINE) {
+      brw_upload_cs_prog(brw);
     }
  }
  
@@ -603,8 +642,8 @@ static inline void
  merge_ctx_state(struct brw_context *brw,
                  struct brw_state_flags *state)
  {
-   state->mesa |= brw->state.dirty.mesa;
-   state->brw |= brw->state.dirty.brw;
+   state->mesa |= brw->NewGLState;
+   state->brw |= brw->ctx.NewDriverState;
  }
  
  static inline void
@@ -623,48 +662,49 @@ brw_upload_pipeline_state(struct brw_context *brw,
                            enum brw_pipeline pipeline)
  {
     struct gl_context *ctx = &brw->ctx;
-   struct brw_state_flags *brw_state = &brw->state.dirty;
     int i;
     static int dirty_count = 0;
     struct brw_state_flags state = brw->state.pipelines[pipeline];
+   unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
  
-   brw_state->mesa |= brw->NewGLState;
-   brw->NewGLState = 0;
-
-   brw_state->brw |= ctx->NewDriverState;
-   ctx->NewDriverState = 0;
+   brw_select_pipeline(brw, pipeline);
  
     if (0) {
        /* Always re-emit all state. */
-      brw_state->mesa |= ~0;
-      brw_state->brw |= ~0ull;
+      brw->NewGLState = ~0;
+      ctx->NewDriverState = ~0ull;
     }
  
     if (pipeline == BRW_RENDER_PIPELINE) {
        if (brw->fragment_program != ctx->FragmentProgram._Current) {
           brw->fragment_program = ctx->FragmentProgram._Current;
-         brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+         brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
        }
  
        if (brw->geometry_program != ctx->GeometryProgram._Current) {
           brw->geometry_program = ctx->GeometryProgram._Current;
-         brw->state.dirty.brw |= BRW_NEW_GEOMETRY_PROGRAM;
+         brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
        }
  
        if (brw->vertex_program != ctx->VertexProgram._Current) {
           brw->vertex_program = ctx->VertexProgram._Current;
-         brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+         brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
        }
     }
  
+   if (brw->compute_program != ctx->ComputeProgram._Current) {
+      brw->compute_program = ctx->ComputeProgram._Current;
+      brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
+   }
+
     if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
        brw->meta_in_progress = _mesa_meta_in_progress(ctx);
-      brw->state.dirty.brw |= BRW_NEW_META_IN_PROGRESS;
+      brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
     }
  
-   if (brw->num_samples != ctx->DrawBuffer->Visual.samples) {
-      brw->num_samples = ctx->DrawBuffer->Visual.samples;
-      brw->state.dirty.brw |= BRW_NEW_NUM_SAMPLES;
+   if (brw->num_samples != fb_samples) {
+      brw->num_samples = fb_samples;
+      brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
     }
  
     /* Exit early if no state is flagged as dirty */
@@ -674,7 +714,7 @@ brw_upload_pipeline_state(struct brw_context *brw,
  
     /* Emit Sandybridge workaround flushes on every primitive, for safety. */
     if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
+      brw_emit_post_sync_nonzero_flush(brw);
  
     brw_upload_programs(brw, pipeline);
     merge_ctx_state(brw, &state);
@@ -742,19 +782,18 @@ static inline void
  brw_pipeline_state_finished(struct brw_context *brw,
                              enum brw_pipeline pipeline)
  {
-   struct brw_state_flags *state = &brw->state.dirty;
-
     /* Save all dirty state into the other pipelines */
     for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
        if (i != pipeline) {
-         brw->state.pipelines[i].mesa |= state->mesa;
-         brw->state.pipelines[i].brw |= state->brw;
+         brw->state.pipelines[i].mesa |= brw->NewGLState;
+         brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
        } else {
           memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
        }
     }
  
-   memset(state, 0, sizeof(*state));
+   brw->NewGLState = 0;
+   brw->ctx.NewDriverState = 0ull;
  }
  
  /**
@@ -770,3 +809,15 @@ brw_render_state_finished(struct brw_context *brw)
  {
     brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
  }
+
+void
+brw_upload_compute_state(struct brw_context *brw)
+{
+   brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
+}
+
+void
+brw_compute_state_finished(struct brw_context *brw)
+{
+   brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
+}