i965: Implement ABO surface state emission.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
index 40563ec64d7d403552b7f5f5469e4e89132e0fdc..666af34ee50492d5d85b58d8aa9034d53819e659 100644 (file)
 
 #include "brw_context.h"
 #include "brw_state.h"
+#include "drivers/common/meta.h"
 #include "intel_batchbuffer.h"
 #include "intel_buffers.h"
 
-/* This is used to initialize brw->state.atoms[].  We could use this
- * list directly except for a single atom, brw_constant_buffer, which
- * has a .dirty value which changes according to the parameters of the
- * current fragment and vertex programs, and so cannot be a static
- * value.
- */
 static const struct brw_tracked_state *gen4_atoms[] =
 {
-   &brw_check_fallback,
-
-   &brw_wm_input_sizes,
    &brw_vs_prog, /* must do before GS prog, state base address. */
-   &brw_gs_prog, /* must do before state base address */
+   &brw_ff_gs_prog, /* must do before state base address */
+
+   &brw_interpolation_map,
+
    &brw_clip_prog, /* must do before state base address */
    &brw_sf_prog, /* must do before state base address */
    &brw_wm_prog, /* must do before state base address */
@@ -63,14 +58,18 @@ static const struct brw_tracked_state *gen4_atoms[] =
    &brw_cc_vp,
    &brw_cc_unit,
 
-   &brw_vs_constants, /* Before vs_surfaces and constant_buffer */
-   &brw_wm_constants, /* Before wm_surfaces and constant_buffer */
-
-   &brw_vs_surfaces,           /* must do before unit */
-   &brw_wm_constant_surface,   /* must do before wm surfaces/bind bo */
-   &brw_wm_surfaces,           /* must do before samplers and unit */
+   /* Surface state setup.  Must come before the VS/WM unit.  The binding
+    * table upload must be last.
+    */
+   &brw_vs_pull_constants,
+   &brw_wm_pull_constants,
+   &brw_renderbuffer_surfaces,
+   &brw_texture_surfaces,
+   &brw_vs_binding_table,
    &brw_wm_binding_table,
-   &brw_wm_samplers,
+
+   &brw_fs_samplers,
+   &brw_vs_samplers,
 
    /* These set up state for brw_psp_urb_cbs */
    &brw_wm_unit,
@@ -82,7 +81,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
 
    /* Command packets:
     */
-   &brw_invarient_state,
+   &brw_invariant_state,
    &brw_state_base_address,
 
    &brw_binding_table_pointers,
@@ -108,18 +107,14 @@ static const struct brw_tracked_state *gen4_atoms[] =
 
 static const struct brw_tracked_state *gen6_atoms[] =
 {
-   &brw_check_fallback,
-
-   &brw_wm_input_sizes,
    &brw_vs_prog, /* must do before state base address */
-   &brw_gs_prog, /* must do before state base address */
+   &brw_ff_gs_prog, /* must do before state base address */
    &brw_wm_prog, /* must do before state base address */
 
    &gen6_clip_vp,
    &gen6_sf_vp,
 
    /* Command packets: */
-   &brw_invarient_state,
 
    /* must do before binding table pointers, cc state ptrs */
    &brw_state_base_address,
@@ -131,20 +126,28 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &gen6_blend_state,          /* must do before cc unit */
    &gen6_color_calc_state,     /* must do before cc unit */
    &gen6_depth_stencil_state,  /* must do before cc unit */
-   &gen6_cc_state_pointers,
 
-   &brw_vs_constants, /* Before vs_surfaces and constant_buffer */
-   &brw_wm_constants, /* Before wm_surfaces and constant_buffer */
    &gen6_vs_push_constants, /* Before vs_state */
    &gen6_wm_push_constants, /* Before wm_state */
 
-   &brw_vs_surfaces,           /* must do before unit */
-   &brw_wm_constant_surface,   /* must do before wm surfaces/bind bo */
-   &brw_wm_surfaces,           /* must do before samplers and unit */
+   /* Surface state setup.  Must come before the VS/WM unit.  The binding
+    * table upload must be last.
+    */
+   &brw_vs_pull_constants,
+   &brw_vs_ubo_surfaces,
+   &brw_wm_pull_constants,
+   &brw_wm_ubo_surfaces,
+   &gen6_renderbuffer_surfaces,
+   &brw_texture_surfaces,
+   &gen6_sol_surface,
+   &brw_vs_binding_table,
+   &gen6_gs_binding_table,
    &brw_wm_binding_table,
 
-   &brw_wm_samplers,
+   &brw_fs_samplers,
+   &brw_vs_samplers,
    &gen6_sampler_state,
+   &gen6_multisample_state,
 
    &gen6_vs_state,
    &gen6_gs_state,
@@ -171,17 +174,13 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &brw_vertices,
 };
 
-const struct brw_tracked_state *gen7_atoms[] =
+static const struct brw_tracked_state *gen7_atoms[] =
 {
-   &brw_check_fallback,
-
-   &brw_wm_input_sizes,
    &brw_vs_prog,
    &brw_gs_prog,
    &brw_wm_prog,
 
    /* Command packets: */
-   &brw_invarient_state,
 
    /* must do before binding table pointers, cc state ptrs */
    &brw_state_base_address,
@@ -190,28 +189,43 @@ const struct brw_tracked_state *gen7_atoms[] =
    &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */
    &gen7_sf_clip_viewport,
 
+   &gen7_push_constant_space,
    &gen7_urb,
    &gen6_blend_state,          /* must do before cc unit */
    &gen6_color_calc_state,     /* must do before cc unit */
    &gen6_depth_stencil_state,  /* must do before cc unit */
-   &gen7_blend_state_pointer,
-   &gen7_cc_state_pointer,
-   &gen7_depth_stencil_state_pointer,
 
-   &brw_vs_constants, /* Before vs_surfaces and constant_buffer */
-   &brw_wm_constants, /* Before wm_surfaces and constant_buffer */
    &gen6_vs_push_constants, /* Before vs_state */
+   &gen7_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
 
-   &brw_vs_surfaces,           /* must do before unit */
-   &brw_wm_constant_surface,   /* must do before wm surfaces/bind bo */
-   &brw_wm_surfaces,           /* must do before samplers and unit */
+   /* Surface state setup.  Must come before the VS/WM unit.  The binding
+    * table upload must be last.
+    */
+   &brw_vs_pull_constants,
+   &brw_vs_ubo_surfaces,
+   &brw_vs_abo_surfaces,
+   &brw_gs_pull_constants,
+   &brw_gs_ubo_surfaces,
+   &brw_gs_abo_surfaces,
+   &brw_wm_pull_constants,
+   &brw_wm_ubo_surfaces,
+   &brw_wm_abo_surfaces,
+   &gen6_renderbuffer_surfaces,
+   &brw_texture_surfaces,
+   &brw_vs_binding_table,
+   &brw_gs_binding_table,
    &brw_wm_binding_table,
 
-   &gen7_samplers,
+   &brw_fs_samplers,
+   &brw_vs_samplers,
+   &brw_gs_samplers,
+   &gen6_multisample_state,
 
    &gen7_disable_stages,
    &gen7_vs_state,
+   &gen7_gs_state,
+   &gen7_sol_state,
    &gen7_clip_state,
    &gen7_sbe_state,
    &gen7_sf_state,
@@ -233,20 +247,35 @@ const struct brw_tracked_state *gen7_atoms[] =
    &brw_indices,
    &brw_index_buffer,
    &brw_vertices,
+
+   &haswell_cut_index,
 };
 
+static void
+brw_upload_initial_gpu_state(struct brw_context *brw)
+{
+   /* On platforms with hardware contexts, we can set our initial GPU state
+    * right away rather than doing it via state atoms.  This saves a small
+    * amount of overhead on every draw call.
+    */
+   if (!brw->hw_ctx)
+      return;
+
+   brw_upload_invariant_state(brw);
+}
 
 void brw_init_state( struct brw_context *brw )
 {
+   struct gl_context *ctx = &brw->ctx;
    const struct brw_tracked_state **atoms;
    int num_atoms;
 
    brw_init_caches(brw);
 
-   if (brw->intel.gen >= 7) {
+   if (brw->gen >= 7) {
       atoms = gen7_atoms;
       num_atoms = ARRAY_SIZE(gen7_atoms);
-   } else if (brw->intel.gen == 6) {
+   } else if (brw->gen == 6) {
       atoms = gen6_atoms;
       num_atoms = ARRAY_SIZE(gen6_atoms);
    } else {
@@ -264,6 +293,21 @@ void brw_init_state( struct brw_context *brw )
       assert((*atoms)->emit);
       atoms++;
    }
+
+   brw_upload_initial_gpu_state(brw);
+
+   brw->state.dirty.mesa = ~0;
+   brw->state.dirty.brw = ~0;
+
+   /* Make sure that brw->state.dirty.brw has enough bits to hold all possible
+    * dirty flags.
+    */
+   STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw));
+
+   ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
+   ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
+   ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
+   ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
 }
 
 
@@ -275,8 +319,8 @@ void brw_destroy_state( struct brw_context *brw )
 /***********************************************************************
  */
 
-static GLuint check_state( const struct brw_state_flags *a,
-                          const struct brw_state_flags *b )
+static bool
+check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
 {
    return ((a->mesa & b->mesa) |
           (a->brw & b->brw) |
@@ -329,48 +373,61 @@ static struct dirty_bit_map mesa_bits[] = {
    DEFINE_BIT(_NEW_TEXTURE),
    DEFINE_BIT(_NEW_TRANSFORM),
    DEFINE_BIT(_NEW_VIEWPORT),
-   DEFINE_BIT(_NEW_PACKUNPACK),
    DEFINE_BIT(_NEW_ARRAY),
    DEFINE_BIT(_NEW_RENDERMODE),
    DEFINE_BIT(_NEW_BUFFERS),
+   DEFINE_BIT(_NEW_CURRENT_ATTRIB),
    DEFINE_BIT(_NEW_MULTISAMPLE),
    DEFINE_BIT(_NEW_TRACK_MATRIX),
    DEFINE_BIT(_NEW_PROGRAM),
    DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
+   DEFINE_BIT(_NEW_BUFFER_OBJECT),
+   DEFINE_BIT(_NEW_FRAG_CLAMP),
+   DEFINE_BIT(_NEW_VARYING_VP_INPUTS),
    {0, 0, 0}
 };
 
 static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_URB_FENCE),
    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+   DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
    DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
-   DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
    DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
    DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
    DEFINE_BIT(BRW_NEW_PRIMITIVE),
    DEFINE_BIT(BRW_NEW_CONTEXT),
-   DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
-   DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
    DEFINE_BIT(BRW_NEW_PSP),
-   DEFINE_BIT(BRW_NEW_WM_SURFACES),
+   DEFINE_BIT(BRW_NEW_SURFACES),
+   DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
    DEFINE_BIT(BRW_NEW_INDICES),
-   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    DEFINE_BIT(BRW_NEW_VERTICES),
    DEFINE_BIT(BRW_NEW_BATCH),
+   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
-   DEFINE_BIT(BRW_NEW_WM_CONSTBUF),
-   DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
-   DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
-   DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
+   DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
+   DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
+   DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
+   DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
+   DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
+   DEFINE_BIT(BRW_NEW_STATS_WM),
+   DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
+   DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
+   DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
+   DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
+   DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
    {0, 0, 0}
 };
 
 static struct dirty_bit_map cache_bits[] = {
-   DEFINE_BIT(CACHE_NEW_BLEND_STATE),
    DEFINE_BIT(CACHE_NEW_CC_VP),
    DEFINE_BIT(CACHE_NEW_CC_UNIT),
    DEFINE_BIT(CACHE_NEW_WM_PROG),
+   DEFINE_BIT(CACHE_NEW_BLORP_BLIT_PROG),
+   DEFINE_BIT(CACHE_NEW_BLORP_CONST_COLOR_PROG),
    DEFINE_BIT(CACHE_NEW_SAMPLER),
    DEFINE_BIT(CACHE_NEW_WM_UNIT),
    DEFINE_BIT(CACHE_NEW_SF_PROG),
@@ -378,7 +435,8 @@ static struct dirty_bit_map cache_bits[] = {
    DEFINE_BIT(CACHE_NEW_SF_UNIT),
    DEFINE_BIT(CACHE_NEW_VS_UNIT),
    DEFINE_BIT(CACHE_NEW_VS_PROG),
-   DEFINE_BIT(CACHE_NEW_GS_UNIT),
+   DEFINE_BIT(CACHE_NEW_FF_GS_UNIT),
+   DEFINE_BIT(CACHE_NEW_FF_GS_PROG),
    DEFINE_BIT(CACHE_NEW_GS_PROG),
    DEFINE_BIT(CACHE_NEW_CLIP_VP),
    DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
@@ -402,7 +460,7 @@ brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
 }
 
 static void
-brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+brw_print_dirty_count(struct dirty_bit_map *bit_map)
 {
    int i;
 
@@ -420,16 +478,19 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
  */
 void brw_upload_state(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->intel.ctx;
-   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &brw->ctx;
    struct brw_state_flags *state = &brw->state.dirty;
    int i;
    static int dirty_count = 0;
 
-   state->mesa |= brw->intel.NewGLState;
-   brw->intel.NewGLState = 0;
+   state->mesa |= brw->NewGLState;
+   brw->NewGLState = 0;
+
+   state->brw |= ctx->NewDriverState;
+   ctx->NewDriverState = 0;
 
-   if (brw->emit_state_always) {
+   if (0) {
+      /* Always re-emit all state. */
       state->mesa |= ~0;
       state->brw |= ~0;
       state->cache |= ~0;
@@ -440,17 +501,25 @@ void brw_upload_state(struct brw_context *brw)
       brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
    }
 
+   if (brw->geometry_program != ctx->GeometryProgram._Current) {
+      brw->geometry_program = ctx->GeometryProgram._Current;
+      brw->state.dirty.brw |= BRW_NEW_GEOMETRY_PROGRAM;
+   }
+
    if (brw->vertex_program != ctx->VertexProgram._Current) {
       brw->vertex_program = ctx->VertexProgram._Current;
       brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
    }
 
+   if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
+      brw->meta_in_progress = _mesa_meta_in_progress(ctx);
+      brw->state.dirty.brw |= BRW_NEW_META_IN_PROGRESS;
+   }
+
    if ((state->mesa | state->cache | state->brw) == 0)
       return;
 
-   brw->intel.Fallback = false; /* boolean, not bitfield */
-
-   intel_check_front_buffer_rendering(intel);
+   intel_check_front_buffer_rendering(brw);
 
    if (unlikely(INTEL_DEBUG)) {
       /* Debug version which enforces various sanity checks on the
@@ -465,9 +534,6 @@ void brw_upload_state(struct brw_context *brw)
         const struct brw_tracked_state *atom = brw->atoms[i];
         struct brw_state_flags generated;
 
-        if (brw->intel.Fallback)
-           break;
-
         if (check_state(state, &atom->dirty)) {
            atom->emit(brw);
         }
@@ -487,9 +553,6 @@ void brw_upload_state(struct brw_context *brw)
       for (i = 0; i < brw->num_atoms; i++) {
         const struct brw_tracked_state *atom = brw->atoms[i];
 
-        if (brw->intel.Fallback)
-           break;
-
         if (check_state(state, &atom->dirty)) {
            atom->emit(brw);
         }
@@ -497,17 +560,19 @@ void brw_upload_state(struct brw_context *brw)
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
+      STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
+      STATIC_ASSERT(ARRAY_SIZE(cache_bits) == BRW_MAX_CACHE + 1);
+
       brw_update_dirty_count(mesa_bits, state->mesa);
       brw_update_dirty_count(brw_bits, state->brw);
       brw_update_dirty_count(cache_bits, state->cache);
       if (dirty_count++ % 1000 == 0) {
-        brw_print_dirty_count(mesa_bits, state->mesa);
-        brw_print_dirty_count(brw_bits, state->brw);
-        brw_print_dirty_count(cache_bits, state->cache);
+        brw_print_dirty_count(mesa_bits);
+        brw_print_dirty_count(brw_bits);
+        brw_print_dirty_count(cache_bits);
         fprintf(stderr, "\n");
       }
    }
 
-   if (!brw->intel.Fallback)
-      memset(state, 0, sizeof(*state));
+   memset(state, 0, sizeof(*state));
 }