i965/fs: Lower 32x32 bit multiplication on BXT.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
index ab316bfc431248314cba3aa5c41826ab795fe67c..6096b4946a00d7b6d20041a8130881426f137a74 100644 (file)
@@ -40,6 +40,8 @@
 #include "brw_ff_gs.h"
 #include "brw_gs.h"
 #include "brw_wm.h"
+#include "brw_cs.h"
+#include "main/framebuffer.h"
 
 static const struct brw_tracked_state *gen4_atoms[] =
 {
@@ -190,6 +192,8 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
    &gen6_color_calc_state,     /* must do before cc unit */
    &gen6_depth_stencil_state,  /* must do before cc unit */
 
+   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
+
    &gen6_vs_push_constants, /* Before vs_state */
    &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -248,6 +252,9 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
 
 static const struct brw_tracked_state *gen7_compute_atoms[] =
 {
+   &brw_state_base_address,
+   &brw_cs_abo_surfaces,
+   &brw_cs_state,
 };
 
 static const struct brw_tracked_state *gen8_render_atoms[] =
@@ -263,6 +270,8 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
    &gen8_blend_state,
    &gen6_color_calc_state,
 
+   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
+
    &gen6_vs_push_constants, /* Before vs_state */
    &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -328,6 +337,9 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
 
 static const struct brw_tracked_state *gen8_compute_atoms[] =
 {
+   &gen8_state_base_address,
+   &brw_cs_abo_surfaces,
+   &brw_cs_state,
 };
 
 static void
@@ -341,7 +353,7 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
       return;
 
    if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
+      brw_emit_post_sync_nonzero_flush(brw);
 
    brw_upload_invariant_state(brw);
 
@@ -401,6 +413,9 @@ void brw_init_state( struct brw_context *brw )
 {
    struct gl_context *ctx = &brw->ctx;
 
+   /* Force the first brw_select_pipeline to emit pipeline select */
+   brw->last_pipeline = BRW_NUM_PIPELINES;
+
    STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
    STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
    STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
@@ -541,6 +556,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
    DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
    DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
    DEFINE_BIT(BRW_NEW_URB_FENCE),
    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
    DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
@@ -580,6 +596,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_CLIP_VP),
    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
+   DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
    {0, 0, 0}
 };
 
@@ -616,6 +633,8 @@ brw_upload_programs(struct brw_context *brw,
          brw_upload_gs_prog(brw);
 
       brw_upload_wm_prog(brw);
+   } else if (pipeline == BRW_COMPUTE_PIPELINE) {
+      brw_upload_cs_prog(brw);
    }
 }
 
@@ -646,6 +665,9 @@ brw_upload_pipeline_state(struct brw_context *brw,
    int i;
    static int dirty_count = 0;
    struct brw_state_flags state = brw->state.pipelines[pipeline];
+   unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
+
+   brw_select_pipeline(brw, pipeline);
 
    if (0) {
       /* Always re-emit all state. */
@@ -670,13 +692,18 @@ brw_upload_pipeline_state(struct brw_context *brw,
       }
    }
 
+   if (brw->compute_program != ctx->ComputeProgram._Current) {
+      brw->compute_program = ctx->ComputeProgram._Current;
+      brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
+   }
+
    if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
       brw->meta_in_progress = _mesa_meta_in_progress(ctx);
       brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
    }
 
-   if (brw->num_samples != ctx->DrawBuffer->Visual.samples) {
-      brw->num_samples = ctx->DrawBuffer->Visual.samples;
+   if (brw->num_samples != fb_samples) {
+      brw->num_samples = fb_samples;
       brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
    }
 
@@ -687,7 +714,7 @@ brw_upload_pipeline_state(struct brw_context *brw,
 
    /* Emit Sandybridge workaround flushes on every primitive, for safety. */
    if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
+      brw_emit_post_sync_nonzero_flush(brw);
 
    brw_upload_programs(brw, pipeline);
    merge_ctx_state(brw, &state);