i965: Enable ARB_stencil_texturing for Haswell
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
index b2ca9c2c0e7be2c851b486c4179fab8df6dd1909..69acf3b203632ab1adced8dd8129609e6e673709 100644 (file)
 
 static const struct brw_tracked_state *gen4_atoms[] =
 {
-   &brw_interpolation_map,
-
-   &brw_clip_prog, /* must do before state base address */
-   &brw_sf_prog, /* must do before state base address */
-
    /* Once all the programs are done, we know how large urb entry
     * sizes need to be and can decide if we need to change the urb
     * layout.
@@ -66,6 +61,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
    &brw_vs_pull_constants,
    &brw_wm_pull_constants,
    &brw_renderbuffer_surfaces,
+   &brw_renderbuffer_read_surfaces,
    &brw_texture_surfaces,
    &brw_vs_binding_table,
    &brw_wm_binding_table,
@@ -84,7 +80,6 @@ static const struct brw_tracked_state *gen4_atoms[] =
    /* Command packets:
     */
    &brw_invariant_state,
-   &brw_state_base_address,
 
    &brw_binding_table_pointers,
    &brw_blend_constant_color,
@@ -114,9 +109,6 @@ static const struct brw_tracked_state *gen6_atoms[] =
 
    /* Command packets: */
 
-   /* must do before binding table pointers, cc state ptrs */
-   &brw_state_base_address,
-
    &brw_cc_vp,
    &gen6_viewport_state,       /* must do after *_vp stages */
 
@@ -139,6 +131,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &brw_wm_pull_constants,
    &brw_wm_ubo_surfaces,
    &gen6_renderbuffer_surfaces,
+   &brw_renderbuffer_read_surfaces,
    &brw_texture_surfaces,
    &gen6_sol_surface,
    &brw_vs_binding_table,
@@ -180,12 +173,10 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
 {
    /* Command packets: */
 
-   /* must do before binding table pointers, cc state ptrs */
-   &brw_state_base_address,
-
    &brw_cc_vp,
    &gen7_sf_clip_viewport,
 
+   &gen7_l3_state,
    &gen7_push_constant_space,
    &gen7_urb,
    &gen6_blend_state,          /* must do before cc unit */
@@ -195,10 +186,14 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
    &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
 
    &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
+   &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
+   &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
    &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
    &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
 
    &gen6_vs_push_constants, /* Before vs_state */
+   &gen7_tcs_push_constants,
+   &gen7_tes_push_constants,
    &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
 
@@ -208,6 +203,12 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
    &brw_vs_pull_constants,
    &brw_vs_ubo_surfaces,
    &brw_vs_abo_surfaces,
+   &brw_tcs_pull_constants,
+   &brw_tcs_ubo_surfaces,
+   &brw_tcs_abo_surfaces,
+   &brw_tes_pull_constants,
+   &brw_tes_ubo_surfaces,
+   &brw_tes_abo_surfaces,
    &brw_gs_pull_constants,
    &brw_gs_ubo_surfaces,
    &brw_gs_abo_surfaces,
@@ -215,18 +216,25 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
    &brw_wm_ubo_surfaces,
    &brw_wm_abo_surfaces,
    &gen6_renderbuffer_surfaces,
+   &brw_renderbuffer_read_surfaces,
    &brw_texture_surfaces,
    &brw_vs_binding_table,
+   &brw_tcs_binding_table,
+   &brw_tes_binding_table,
    &brw_gs_binding_table,
    &brw_wm_binding_table,
 
    &brw_fs_samplers,
    &brw_vs_samplers,
+   &brw_tcs_samplers,
+   &brw_tes_samplers,
    &brw_gs_samplers,
    &gen6_multisample_state,
 
-   &gen7_disable_stages,
    &gen7_vs_state,
+   &gen7_hs_state,
+   &gen7_te_state,
+   &gen7_ds_state,
    &gen7_gs_state,
    &gen7_sol_state,
    &gen7_clip_state,
@@ -256,21 +264,24 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
 
 static const struct brw_tracked_state *gen7_compute_atoms[] =
 {
-   &brw_state_base_address,
+   &gen7_l3_state,
    &brw_cs_image_surfaces,
    &gen7_cs_push_constants,
+   &brw_cs_pull_constants,
+   &brw_cs_ubo_surfaces,
    &brw_cs_abo_surfaces,
+   &brw_cs_texture_surfaces,
+   &brw_cs_work_groups_surface,
+   &brw_cs_samplers,
    &brw_cs_state,
 };
 
 static const struct brw_tracked_state *gen8_render_atoms[] =
 {
-   /* Command packets: */
-   &gen8_state_base_address,
-
    &brw_cc_vp,
    &gen8_sf_clip_viewport,
 
+   &gen7_l3_state,
    &gen7_push_constant_space,
    &gen7_urb,
    &gen8_blend_state,
@@ -279,10 +290,14 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
    &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
 
    &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
+   &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
+   &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
    &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
    &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
 
    &gen6_vs_push_constants, /* Before vs_state */
+   &gen7_tcs_push_constants,
+   &gen7_tes_push_constants,
    &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
 
@@ -292,6 +307,12 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
    &brw_vs_pull_constants,
    &brw_vs_ubo_surfaces,
    &brw_vs_abo_surfaces,
+   &brw_tcs_pull_constants,
+   &brw_tcs_ubo_surfaces,
+   &brw_tcs_abo_surfaces,
+   &brw_tes_pull_constants,
+   &brw_tes_ubo_surfaces,
+   &brw_tes_abo_surfaces,
    &brw_gs_pull_constants,
    &brw_gs_ubo_surfaces,
    &brw_gs_abo_surfaces,
@@ -299,20 +320,28 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
    &brw_wm_ubo_surfaces,
    &brw_wm_abo_surfaces,
    &gen6_renderbuffer_surfaces,
+   &brw_renderbuffer_read_surfaces,
    &brw_texture_surfaces,
    &brw_vs_binding_table,
+   &brw_tcs_binding_table,
+   &brw_tes_binding_table,
    &brw_gs_binding_table,
    &brw_wm_binding_table,
 
    &brw_fs_samplers,
    &brw_vs_samplers,
+   &brw_tcs_samplers,
+   &brw_tes_samplers,
    &brw_gs_samplers,
    &gen8_multisample_state,
 
    &gen8_disable_stages,
    &gen8_vs_state,
+   &gen8_hs_state,
+   &gen7_te_state,
+   &gen8_ds_state,
    &gen8_gs_state,
-   &gen8_sol_state,
+   &gen7_sol_state,
    &gen6_clip_state,
    &gen8_raster_state,
    &gen8_sbe_state,
@@ -347,10 +376,15 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
 
 static const struct brw_tracked_state *gen8_compute_atoms[] =
 {
-   &gen8_state_base_address,
+   &gen7_l3_state,
    &brw_cs_image_surfaces,
    &gen7_cs_push_constants,
+   &brw_cs_pull_constants,
+   &brw_cs_ubo_surfaces,
    &brw_cs_abo_surfaces,
+   &brw_cs_texture_surfaces,
+   &brw_cs_work_groups_surface,
+   &brw_cs_samplers,
    &brw_cs_state,
 };
 
@@ -374,7 +408,7 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
       BEGIN_BATCH(3);
       OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
       OUT_BATCH(GEN7_CACHE_MODE_1);
-      OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
+      OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
                 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
       ADVANCE_BATCH();
    }
@@ -482,9 +516,11 @@ void brw_init_state( struct brw_context *brw )
    ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
    ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
+   ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
    ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
+   ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
 }
 
 
@@ -568,30 +604,34 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
    DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
    DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
+   DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
    DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
    DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
    DEFINE_BIT(BRW_NEW_URB_FENCE),
    DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
    DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
+   DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
    DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
    DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
    DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+   DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
    DEFINE_BIT(BRW_NEW_PRIMITIVE),
    DEFINE_BIT(BRW_NEW_CONTEXT),
    DEFINE_BIT(BRW_NEW_PSP),
    DEFINE_BIT(BRW_NEW_SURFACES),
-   DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
-   DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
-   DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
    DEFINE_BIT(BRW_NEW_INDICES),
    DEFINE_BIT(BRW_NEW_VERTICES),
+   DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
    DEFINE_BIT(BRW_NEW_BATCH),
    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
+   DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
+   DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
    DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
    DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
-   DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
    DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
    DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
    DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
@@ -611,6 +651,10 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
    DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
    DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
+   DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
+   DEFINE_BIT(BRW_NEW_URB_SIZE),
+   DEFINE_BIT(BRW_NEW_CC_STATE),
+   DEFINE_BIT(BRW_NEW_BLORP),
    {0, 0, 0}
 };
 
@@ -628,24 +672,76 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map)
 {
    for (int i = 0; bit_map[i].bit != 0; i++) {
       if (bit_map[i].count > 1) {
-         fprintf(stderr, "0x%016lx: %12d (%s)\n",
+         fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
                  bit_map[i].bit, bit_map[i].count, bit_map[i].name);
       }
    }
 }
 
+static inline void
+brw_upload_tess_programs(struct brw_context *brw)
+{
+   if (brw->tess_eval_program) {
+      uint64_t per_vertex_slots = brw->tess_eval_program->Base.InputsRead;
+      uint32_t per_patch_slots =
+         brw->tess_eval_program->Base.PatchInputsRead;
+
+      /* The TCS may have additional outputs which aren't read by the
+       * TES (possibly for cross-thread communication).  These need to
+       * be stored in the Patch URB Entry as well.
+       */
+      if (brw->tess_ctrl_program) {
+         per_vertex_slots |= brw->tess_ctrl_program->Base.OutputsWritten;
+         per_patch_slots |=
+            brw->tess_ctrl_program->Base.PatchOutputsWritten;
+      }
+
+      brw_upload_tcs_prog(brw, per_vertex_slots, per_patch_slots);
+      brw_upload_tes_prog(brw, per_vertex_slots, per_patch_slots);
+   } else {
+      brw->tcs.prog_data = NULL;
+      brw->tcs.base.prog_data = NULL;
+      brw->tes.prog_data = NULL;
+      brw->tes.base.prog_data = NULL;
+   }
+}
+
 static inline void
 brw_upload_programs(struct brw_context *brw,
                     enum brw_pipeline pipeline)
 {
    if (pipeline == BRW_RENDER_PIPELINE) {
       brw_upload_vs_prog(brw);
+      brw_upload_tess_programs(brw);
 
       if (brw->gen < 6)
          brw_upload_ff_gs_prog(brw);
       else
          brw_upload_gs_prog(brw);
 
+      /* Update the VUE map for data exiting the GS stage of the pipeline.
+       * This comes from the last enabled shader stage.
+       */
+      GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
+      bool old_separate = brw->vue_map_geom_out.separate;
+      if (brw->geometry_program)
+         brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
+      else if (brw->tess_eval_program)
+         brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map;
+      else
+         brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
+
+      /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
+      if (old_slots != brw->vue_map_geom_out.slots_valid ||
+          old_separate != brw->vue_map_geom_out.separate)
+         brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
+
+      if (brw->gen < 6) {
+         brw_setup_vue_interpolation(brw);
+         brw_upload_clip_prog(brw);
+         brw_upload_sf_prog(brw);
+      }
+
       brw_upload_wm_prog(brw);
    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
       brw_upload_cs_prog(brw);
@@ -695,6 +791,16 @@ brw_upload_pipeline_state(struct brw_context *brw,
          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
       }
 
+      if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
+         brw->tess_eval_program = ctx->TessEvalProgram._Current;
+         brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
+      }
+
+      if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
+         brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
+         brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
+      }
+
       if (brw->geometry_program != ctx->GeometryProgram._Current) {
          brw->geometry_program = ctx->GeometryProgram._Current;
          brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
@@ -733,6 +839,8 @@ brw_upload_pipeline_state(struct brw_context *brw,
    brw_upload_programs(brw, pipeline);
    merge_ctx_state(brw, &state);
 
+   brw_upload_state_base_address(brw);
+
    const struct brw_tracked_state *atoms =
       brw_get_pipeline_atoms(brw, pipeline);
    const int num_atoms = brw->num_atoms[pipeline];
@@ -797,7 +905,7 @@ brw_pipeline_state_finished(struct brw_context *brw,
                             enum brw_pipeline pipeline)
 {
    /* Save all dirty state into the other pipelines */
-   for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
+   for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
       if (i != pipeline) {
          brw->state.pipelines[i].mesa |= brw->NewGLState;
          brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;