i965: Enable L3 caching of buffer surfaces.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
index e78f590a53dd6fd093e6f91b34635e623b8f4a06..52d96f40413aa2a7e0306d141547d36cca3d9b90 100644 (file)
@@ -98,7 +98,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
    &brw_psp_urb_cbs,
 
    &brw_drawing_rect,
-   &brw_indices,
+   &brw_indices, /* must come before brw_vertices */
    &brw_index_buffer,
    &brw_vertices,
 
@@ -108,7 +108,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
 static const struct brw_tracked_state *gen6_atoms[] =
 {
    &brw_vs_prog, /* must do before state base address */
-   &brw_ff_gs_prog, /* must do before state base address */
+   &brw_gs_prog, /* must do before state base address */
    &brw_wm_prog, /* must do before state base address */
 
    &gen6_clip_vp,
@@ -128,6 +128,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &gen6_depth_stencil_state,  /* must do before cc unit */
 
    &gen6_vs_push_constants, /* Before vs_state */
+   &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_state */
 
    /* Surface state setup.  Must come before the VS/WM unit.  The binding
@@ -135,6 +136,8 @@ static const struct brw_tracked_state *gen6_atoms[] =
     */
    &brw_vs_pull_constants,
    &brw_vs_ubo_surfaces,
+   &brw_gs_pull_constants,
+   &brw_gs_ubo_surfaces,
    &brw_wm_pull_constants,
    &brw_wm_ubo_surfaces,
    &gen6_renderbuffer_surfaces,
@@ -146,6 +149,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
 
    &brw_fs_samplers,
    &brw_vs_samplers,
+   &brw_gs_samplers,
    &gen6_sampler_state,
    &gen6_multisample_state,
 
@@ -169,7 +173,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
 
    &brw_drawing_rect,
 
-   &brw_indices,
+   &brw_indices, /* must come before brw_vertices */
    &brw_index_buffer,
    &brw_vertices,
 };
@@ -186,7 +190,6 @@ static const struct brw_tracked_state *gen7_atoms[] =
    &brw_state_base_address,
 
    &brw_cc_vp,
-   &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */
    &gen7_sf_clip_viewport,
 
    &gen7_push_constant_space,
@@ -196,7 +199,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
    &gen6_depth_stencil_state,  /* must do before cc unit */
 
    &gen6_vs_push_constants, /* Before vs_state */
-   &gen7_gs_push_constants, /* Before gs_state */
+   &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
 
    /* Surface state setup.  Must come before the VS/WM unit.  The binding
@@ -244,7 +247,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
 
    &brw_drawing_rect,
 
-   &brw_indices,
+   &brw_indices, /* must come before brw_vertices */
    &brw_index_buffer,
    &brw_vertices,
 
@@ -258,19 +261,18 @@ static const struct brw_tracked_state *gen8_atoms[] =
    &brw_wm_prog,
 
    /* Command packets: */
-   &brw_state_base_address,
+   &gen8_state_base_address,
 
    &brw_cc_vp,
-   &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */
-   &gen7_sf_clip_viewport,
+   &gen8_sf_clip_viewport,
 
    &gen7_push_constant_space,
    &gen7_urb,
-   &gen6_blend_state,
+   &gen8_blend_state,
    &gen6_color_calc_state,
 
    &gen6_vs_push_constants, /* Before vs_state */
-   &gen7_gs_push_constants, /* Before gs_state */
+   &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
 
    /* Surface state setup.  Must come before the VS/WM unit.  The binding
@@ -294,20 +296,21 @@ static const struct brw_tracked_state *gen8_atoms[] =
    &brw_fs_samplers,
    &brw_vs_samplers,
    &brw_gs_samplers,
-   &gen6_multisample_state,
+   &gen8_multisample_state,
 
-   &gen7_disable_stages,
+   &gen8_disable_stages,
    &gen8_vs_state,
-   &gen7_gs_state,
-   &gen7_sol_state,
-   &gen7_clip_state,
+   &gen8_gs_state,
+   &gen8_sol_state,
+   &gen6_clip_state,
    &gen8_raster_state,
    &gen8_sbe_state,
    &gen8_sf_state,
    &gen8_ps_blend,
+   &gen8_ps_extra,
+   &gen8_ps_state,
    &gen8_wm_depth_stencil,
-   &gen7_wm_state,
-   &gen7_ps_state,
+   &gen8_wm_state,
 
    &gen6_scissor_state,
 
@@ -321,11 +324,14 @@ static const struct brw_tracked_state *gen8_atoms[] =
 
    &brw_drawing_rect,
 
+   &gen8_vf_topology,
+
    &brw_indices,
-   &brw_index_buffer,
-   &brw_vertices,
+   &gen8_index_buffer,
+   &gen8_vertices,
 
    &haswell_cut_index,
+   &gen8_pma_fix,
 };
 
 static void
@@ -339,6 +345,10 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
       return;
 
    brw_upload_invariant_state(brw);
+
+   if (brw->gen >= 8) {
+      gen8_emit_3dstate_sample_pattern(brw);
+   }
 }
 
 void brw_init_state( struct brw_context *brw )
@@ -347,6 +357,11 @@ void brw_init_state( struct brw_context *brw )
    const struct brw_tracked_state **atoms;
    int num_atoms;
 
+   STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen7_atoms) <= ARRAY_SIZE(brw->atoms));
+   STATIC_ASSERT(ARRAY_SIZE(gen8_atoms) <= ARRAY_SIZE(brw->atoms));
+
    brw_init_caches(brw);
 
    if (brw->gen >= 8) {
@@ -363,13 +378,19 @@ void brw_init_state( struct brw_context *brw )
       num_atoms = ARRAY_SIZE(gen4_atoms);
    }
 
-   brw->atoms = atoms;
    brw->num_atoms = num_atoms;
 
+   /* This is to work around brw_context::atoms being declared const.  We want
+    * it to be const, but it needs to be initialized somehow!
+    */
+   struct brw_tracked_state *context_atoms =
+      (struct brw_tracked_state *) &brw->atoms[0];
+
+   for (int i = 0; i < num_atoms; i++)
+      context_atoms[i] = *atoms[i];
+
    while (num_atoms--) {
-      assert((*atoms)->dirty.mesa |
-            (*atoms)->dirty.brw |
-            (*atoms)->dirty.cache);
+      assert((*atoms)->dirty.mesa | (*atoms)->dirty.brw);
       assert((*atoms)->emit);
       atoms++;
    }
@@ -377,7 +398,12 @@ void brw_init_state( struct brw_context *brw )
    brw_upload_initial_gpu_state(brw);
 
    brw->state.dirty.mesa = ~0;
-   brw->state.dirty.brw = ~0;
+   brw->state.dirty.brw = ~0ull;
+
+   /* ~0 is a nonsensical value which won't match anything we program, so
+    * the programming will take effect on the first time around.
+    */
+   brw->pma_stall_bits = ~0;
 
    /* Make sure that brw->state.dirty.brw has enough bits to hold all possible
     * dirty flags.
@@ -388,6 +414,7 @@ void brw_init_state( struct brw_context *brw )
    ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
    ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
+   ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
 }
 
@@ -403,9 +430,7 @@ void brw_destroy_state( struct brw_context *brw )
 static bool
 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
 {
-   return ((a->mesa & b->mesa) |
-          (a->brw & b->brw) |
-          (a->cache & b->cache)) != 0;
+   return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
 }
 
 static void accumulate_state( struct brw_state_flags *a,
@@ -413,7 +438,6 @@ static void accumulate_state( struct brw_state_flags *a,
 {
    a->mesa |= b->mesa;
    a->brw |= b->brw;
-   a->cache |= b->cache;
 }
 
 
@@ -423,11 +447,10 @@ static void xor_states( struct brw_state_flags *result,
 {
    result->mesa = a->mesa ^ b->mesa;
    result->brw = a->brw ^ b->brw;
-   result->cache = a->cache ^ b->cache;
 }
 
 struct dirty_bit_map {
-   uint32_t bit;
+   uint64_t bit;
    char *name;
    uint32_t count;
 };
@@ -464,11 +487,19 @@ static struct dirty_bit_map mesa_bits[] = {
    DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
    DEFINE_BIT(_NEW_BUFFER_OBJECT),
    DEFINE_BIT(_NEW_FRAG_CLAMP),
-   DEFINE_BIT(_NEW_VARYING_VP_INPUTS),
+   /* Avoid sign extension problems. */
+   {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
    {0, 0, 0}
 };
 
 static struct dirty_bit_map brw_bits[] = {
+   DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
    DEFINE_BIT(BRW_NEW_URB_FENCE),
    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
    DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
@@ -500,41 +531,21 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
    DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+   DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
+   DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
+   DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
+   DEFINE_BIT(BRW_NEW_CC_VP),
+   DEFINE_BIT(BRW_NEW_SF_VP),
+   DEFINE_BIT(BRW_NEW_CLIP_VP),
+   DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
+   DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
    {0, 0, 0}
 };
 
-static struct dirty_bit_map cache_bits[] = {
-   DEFINE_BIT(CACHE_NEW_CC_VP),
-   DEFINE_BIT(CACHE_NEW_CC_UNIT),
-   DEFINE_BIT(CACHE_NEW_WM_PROG),
-   DEFINE_BIT(CACHE_NEW_BLORP_BLIT_PROG),
-   DEFINE_BIT(CACHE_NEW_BLORP_CONST_COLOR_PROG),
-   DEFINE_BIT(CACHE_NEW_SAMPLER),
-   DEFINE_BIT(CACHE_NEW_WM_UNIT),
-   DEFINE_BIT(CACHE_NEW_SF_PROG),
-   DEFINE_BIT(CACHE_NEW_SF_VP),
-   DEFINE_BIT(CACHE_NEW_SF_UNIT),
-   DEFINE_BIT(CACHE_NEW_VS_UNIT),
-   DEFINE_BIT(CACHE_NEW_VS_PROG),
-   DEFINE_BIT(CACHE_NEW_FF_GS_UNIT),
-   DEFINE_BIT(CACHE_NEW_FF_GS_PROG),
-   DEFINE_BIT(CACHE_NEW_GS_PROG),
-   DEFINE_BIT(CACHE_NEW_CLIP_VP),
-   DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
-   DEFINE_BIT(CACHE_NEW_CLIP_PROG),
-   {0, 0, 0}
-};
-
-
 static void
-brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
 {
-   int i;
-
-   for (i = 0; i < 32; i++) {
-      if (bit_map[i].bit == 0)
-        return;
-
+   for (int i = 0; bit_map[i].bit != 0; i++) {
       if (bit_map[i].bit & bits)
         bit_map[i].count++;
    }
@@ -543,14 +554,11 @@ brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
 static void
 brw_print_dirty_count(struct dirty_bit_map *bit_map)
 {
-   int i;
-
-   for (i = 0; i < 32; i++) {
-      if (bit_map[i].bit == 0)
-        return;
-
-      fprintf(stderr, "0x%08x: %12d (%s)\n",
-             bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+   for (int i = 0; bit_map[i].bit != 0; i++) {
+      if (bit_map[i].count > 1) {
+         fprintf(stderr, "0x%016lx: %12d (%s)\n",
+                 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+      }
    }
 }
 
@@ -573,8 +581,7 @@ void brw_upload_state(struct brw_context *brw)
    if (0) {
       /* Always re-emit all state. */
       state->mesa |= ~0;
-      state->brw |= ~0;
-      state->cache |= ~0;
+      state->brw |= ~0ull;
    }
 
    if (brw->fragment_program != ctx->FragmentProgram._Current) {
@@ -597,10 +604,13 @@ void brw_upload_state(struct brw_context *brw)
       brw->state.dirty.brw |= BRW_NEW_META_IN_PROGRESS;
    }
 
-   if ((state->mesa | state->cache | state->brw) == 0)
-      return;
+   if (brw->num_samples != ctx->DrawBuffer->Visual.samples) {
+      brw->num_samples = ctx->DrawBuffer->Visual.samples;
+      brw->state.dirty.brw |= BRW_NEW_NUM_SAMPLES;
+   }
 
-   intel_check_front_buffer_rendering(brw);
+   if ((state->mesa | state->brw) == 0)
+      return;
 
    if (unlikely(INTEL_DEBUG)) {
       /* Debug version which enforces various sanity checks on the
@@ -612,7 +622,7 @@ void brw_upload_state(struct brw_context *brw)
       prev = *state;
 
       for (i = 0; i < brw->num_atoms; i++) {
-        const struct brw_tracked_state *atom = brw->atoms[i];
+        const struct brw_tracked_state *atom = &brw->atoms[i];
         struct brw_state_flags generated;
 
         if (check_state(state, &atom->dirty)) {
@@ -632,7 +642,7 @@ void brw_upload_state(struct brw_context *brw)
    }
    else {
       for (i = 0; i < brw->num_atoms; i++) {
-        const struct brw_tracked_state *atom = brw->atoms[i];
+        const struct brw_tracked_state *atom = &brw->atoms[i];
 
         if (check_state(state, &atom->dirty)) {
            atom->emit(brw);
@@ -642,15 +652,12 @@ void brw_upload_state(struct brw_context *brw)
 
    if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
       STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
-      STATIC_ASSERT(ARRAY_SIZE(cache_bits) == BRW_MAX_CACHE + 1);
 
       brw_update_dirty_count(mesa_bits, state->mesa);
       brw_update_dirty_count(brw_bits, state->brw);
-      brw_update_dirty_count(cache_bits, state->cache);
       if (dirty_count++ % 1000 == 0) {
         brw_print_dirty_count(mesa_bits);
         brw_print_dirty_count(brw_bits);
-        brw_print_dirty_count(cache_bits);
         fprintf(stderr, "\n");
       }
    }