i965: Support accelerated blit for depth 30 formats. (v2)
[mesa.git] / src / mesa / drivers / dri / i965 / genX_state_upload.c
index f2b3e4570b2bda02d9d92ca39ddc6cbdc8c21ded..50ac5bc59fff4f9558feba69abafde5dcfa29c9a 100644 (file)
@@ -34,9 +34,6 @@
 #include "main/state.h"
 
 #include "brw_context.h"
-#if GEN_GEN == 6
-#include "brw_defines.h"
-#endif
 #include "brw_draw.h"
 #include "brw_multisample_state.h"
 #include "brw_state.h"
@@ -89,7 +86,7 @@ __gen_combine_address(struct brw_context *brw, void *location,
       return address.offset + delta;
    } else {
       if (GEN_GEN < 6 && brw_ptr_in_state_buffer(batch, location)) {
-         offset = (char *) location - (char *) brw->batch.state_map;
+         offset = (char *) location - (char *) brw->batch.state.map;
          return brw_state_reloc(batch, offset, address.bo,
                                 address.offset + delta,
                                 address.reloc_flags);
@@ -97,7 +94,7 @@ __gen_combine_address(struct brw_context *brw, void *location,
 
       assert(!brw_ptr_in_state_buffer(batch, location));
 
-      offset = (char *) location - (char *) brw->batch.map;
+      offset = (char *) location - (char *) brw->batch.batch.map;
       return brw_batch_reloc(batch, offset, address.bo,
                              address.offset + delta,
                              address.reloc_flags);
@@ -1279,7 +1276,7 @@ genX(upload_clip_state)(struct brw_context *brw)
       clip.GuardbandClipTestEnable = true;
 
       clip.ClipperViewportStatePointer =
-         ro_bo(brw->batch.state_bo, brw->clip.vp_offset);
+         ro_bo(brw->batch.state.bo, brw->clip.vp_offset);
 
       clip.ScreenSpaceViewportXMin = -1;
       clip.ScreenSpaceViewportXMax = 1;
@@ -1496,7 +1493,7 @@ genX(upload_sf)(struct brw_context *brw)
        * domain.
        */
       sf.SetupViewportStateOffset =
-         ro_bo(brw->batch.state_bo, brw->sf.vp_offset);
+         ro_bo(brw->batch.state.bo, brw->sf.vp_offset);
 
       sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
 
@@ -1626,6 +1623,16 @@ genX(upload_sf)(struct brw_context *brw)
          sf.SmoothPointEnable = true;
 #endif
 
+#if GEN_GEN == 10
+      /* _NEW_BUFFERS
+       * Smooth Point Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
+       */
+      const bool multisampled_fbo =
+         _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+      if (multisampled_fbo)
+         sf.SmoothPointEnable = false;
+#endif
+
 #if GEN_IS_G4X || GEN_GEN >= 5
       sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
 #endif
@@ -1681,7 +1688,8 @@ static const struct brw_tracked_state genX(sf_state) = {
                _NEW_POINT |
                _NEW_PROGRAM |
                (GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0) |
-               (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
+               (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0) |
+               (GEN_GEN == 10 ? _NEW_BUFFERS : 0),
       .brw   = BRW_NEW_BLORP |
                BRW_NEW_VUE_MAP_GEOM_OUT |
                (GEN_GEN <= 5 ? BRW_NEW_BATCH |
@@ -1789,7 +1797,7 @@ genX(upload_wm)(struct brw_context *brw)
 
       if (stage_state->sampler_count)
          wm.SamplerStatePointer =
-            ro_bo(brw->batch.state_bo, stage_state->sampler_offset);
+            ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
 #if GEN_GEN == 5
       if (wm_prog_data->prog_offset_2)
          wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
@@ -2082,7 +2090,7 @@ genX(upload_vs_state)(struct brw_context *brw)
 
       vs.StatisticsEnable = false;
       vs.SamplerStatePointer =
-         ro_bo(brw->batch.state_bo, stage_state->sampler_offset);
+         ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
 #endif
 
 #if GEN_GEN == 5
@@ -3117,9 +3125,8 @@ genX(upload_push_constant_packets)(struct brw_context *brw)
       }
 
       stage_state->push_constants_dirty = false;
+      brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
    }
-
-   brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
 }
 
 const struct brw_tracked_state genX(push_constant_packets) = {
@@ -3165,12 +3172,10 @@ genX(upload_gs_push_constants)(struct brw_context *brw)
    /* BRW_NEW_GEOMETRY_PROGRAM */
    const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
 
-   if (gp) {
-      /* BRW_NEW_GS_PROG_DATA */
-      struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
+   /* BRW_NEW_GS_PROG_DATA */
+   struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
 
-      gen6_upload_push_constants(brw, gp, prog_data, stage_state);
-   }
+   gen6_upload_push_constants(brw, gp, prog_data, stage_state);
 }
 
 static const struct brw_tracked_state genX(gs_push_constants) = {
@@ -3290,7 +3295,8 @@ genX(upload_multisample_state)(struct brw_context *brw)
 
 static const struct brw_tracked_state genX(multisample_state) = {
    .dirty = {
-      .mesa = _NEW_MULTISAMPLE,
+      .mesa = _NEW_MULTISAMPLE |
+              (GEN_GEN == 10 ? _NEW_BUFFERS : 0),
       .brw = BRW_NEW_BLORP |
              BRW_NEW_CONTEXT |
              BRW_NEW_NUM_SAMPLES,
@@ -3324,7 +3330,7 @@ genX(upload_color_calc_state)(struct brw_context *brw)
       cc.StatisticsEnable = brw->stats_wm;
 
       cc.CCViewportStatePointer =
-         ro_bo(brw->batch.state_bo, brw->cc.vp_offset);
+         ro_bo(brw->batch.state.bo, brw->cc.vp_offset);
 #else
       /* _NEW_COLOR */
       cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
@@ -4027,11 +4033,9 @@ genX(upload_tes_push_constants)(struct brw_context *brw)
    /* BRW_NEW_TESS_PROGRAMS */
    const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
 
-   if (tep) {
-      /* BRW_NEW_TES_PROG_DATA */
-      const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
-      gen6_upload_push_constants(brw, tep, prog_data, stage_state);
-   }
+   /* BRW_NEW_TES_PROG_DATA */
+   const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
+   gen6_upload_push_constants(brw, tep, prog_data, stage_state);
 }
 
 static const struct brw_tracked_state genX(tes_push_constants) = {
@@ -4051,14 +4055,11 @@ genX(upload_tcs_push_constants)(struct brw_context *brw)
    struct brw_stage_state *stage_state = &brw->tcs.base;
    /* BRW_NEW_TESS_PROGRAMS */
    const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
-   bool active = brw->programs[MESA_SHADER_TESS_EVAL];
 
-   if (active) {
-      /* BRW_NEW_TCS_PROG_DATA */
-      const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
+   /* BRW_NEW_TCS_PROG_DATA */
+   const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
 
-      gen6_upload_push_constants(brw, tcp, prog_data, stage_state);
-   }
+   gen6_upload_push_constants(brw, tcp, prog_data, stage_state);
 }
 
 static const struct brw_tracked_state genX(tcs_push_constants) = {
@@ -4168,30 +4169,49 @@ genX(upload_cs_state)(struct brw_context *brw)
    uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes,
                                     32, &stage_state->bind_bo_offset);
 
+   /* The MEDIA_VFE_STATE documentation for Gen8+ says:
+    *
+    * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
+    *  the only bits that are changed are scoreboard related: Scoreboard
+    *  Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
+    *  these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient."
+    *
+    * Earlier generations say "MI_FLUSH" instead of "stalling PIPE_CONTROL",
+    * but MI_FLUSH isn't really a thing, so we assume they meant PIPE_CONTROL.
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+
    brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
       if (prog_data->total_scratch) {
-         uint32_t bo_offset;
+         uint32_t per_thread_scratch_value;
 
          if (GEN_GEN >= 8) {
             /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
              * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
              */
-            bo_offset = ffs(stage_state->per_thread_scratch) - 11;
+            per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 11;
          } else if (GEN_IS_HASWELL) {
             /* Haswell's Per Thread Scratch Space is in the range [0, 10]
              * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
              */
-            bo_offset = ffs(stage_state->per_thread_scratch) - 12;
+            per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 12;
          } else {
             /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
              * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
              */
-            bo_offset = stage_state->per_thread_scratch / 1024 - 1;
+            per_thread_scratch_value = stage_state->per_thread_scratch / 1024 - 1;
          }
-         vfe.ScratchSpaceBasePointer =
-            rw_bo(stage_state->scratch_bo, bo_offset);
+         vfe.ScratchSpaceBasePointer = rw_bo(stage_state->scratch_bo, 0);
+         vfe.PerThreadScratchSpace = per_thread_scratch_value;
       }
 
+      /* If brw->screen->subslice_total is greater than one, then
+       * devinfo->max_cs_threads stores number of threads per sub-slice;
+       * thus we need to multiply by that number by subslices to get
+       * the actual maximum number of threads; the -1 is because the HW
+       * has a bias of 1 (would not make sense to say the maximum number
+       * of threads is 0).
+       */
       const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
       vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
       vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
@@ -4244,7 +4264,7 @@ genX(upload_cs_state)(struct brw_context *brw)
    const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
       .KernelStartPointer = brw->cs.base.prog_offset,
       .SamplerStatePointer = stage_state->sampler_offset,
-      .SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4) >> 2,
+      .SamplerCount = DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4),
       .BindingTablePointer = stage_state->bind_bo_offset,
       .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
       .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
@@ -4286,16 +4306,16 @@ static const struct brw_tracked_state genX(cs_state) = {
 static void
 genX(upload_raster)(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->ctx;
+   const struct gl_context *ctx = &brw->ctx;
 
    /* _NEW_BUFFERS */
-   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
 
    /* _NEW_POLYGON */
-   struct gl_polygon_attrib *polygon = &ctx->Polygon;
+   const struct gl_polygon_attrib *polygon = &ctx->Polygon;
 
    /* _NEW_POINT */
-   struct gl_point_attrib *point = &ctx->Point;
+   const struct gl_point_attrib *point = &ctx->Point;
 
    brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
       if (brw->polygon_front_bit == render_to_fbo)
@@ -4319,7 +4339,7 @@ genX(upload_raster)(struct brw_context *brw)
          raster.CullMode = CULLMODE_NONE;
       }
 
-      point->SmoothFlag = raster.SmoothPointEnable;
+      raster.SmoothPointEnable = point->SmoothFlag;
 
       raster.DXMultisampleRasterizationEnable =
          _mesa_is_multisample_enabled(ctx);
@@ -4359,6 +4379,16 @@ genX(upload_raster)(struct brw_context *brw)
       /* _NEW_LINE */
       raster.AntialiasingEnable = ctx->Line.SmoothFlag;
 
+#if GEN_GEN == 10
+      /* _NEW_BUFFERS
+       * Antialiasing Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
+       */
+      const bool multisampled_fbo =
+         _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+      if (multisampled_fbo)
+         raster.AntialiasingEnable = false;
+#endif
+
       /* _NEW_SCISSOR */
       raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
 
@@ -5070,7 +5100,7 @@ genX(update_sampler_state)(struct brw_context *brw,
    }
 #if GEN_GEN < 6
       samp_st.BorderColorPointer =
-         ro_bo(brw->batch.state_bo, border_color_offset);
+         ro_bo(brw->batch.state.bo, border_color_offset);
 #else
       samp_st.BorderColorPointer = border_color_offset;
 #endif
@@ -5353,8 +5383,6 @@ genX(init_atoms)(struct brw_context *brw)
 
       /* Command packets:
        */
-      &brw_invariant_state,
-
       &brw_binding_table_pointers,
       &genX(blend_constant_color),
 
@@ -5470,19 +5498,14 @@ genX(init_atoms)(struct brw_context *brw)
        */
       &brw_vs_pull_constants,
       &brw_vs_ubo_surfaces,
-      &brw_vs_abo_surfaces,
       &brw_tcs_pull_constants,
       &brw_tcs_ubo_surfaces,
-      &brw_tcs_abo_surfaces,
       &brw_tes_pull_constants,
       &brw_tes_ubo_surfaces,
-      &brw_tes_abo_surfaces,
       &brw_gs_pull_constants,
       &brw_gs_ubo_surfaces,
-      &brw_gs_abo_surfaces,
       &brw_wm_pull_constants,
       &brw_wm_ubo_surfaces,
-      &brw_wm_abo_surfaces,
       &gen6_renderbuffer_surfaces,
       &brw_renderbuffer_read_surfaces,
       &brw_texture_surfaces,
@@ -5562,19 +5585,14 @@ genX(init_atoms)(struct brw_context *brw)
        */
       &brw_vs_pull_constants,
       &brw_vs_ubo_surfaces,
-      &brw_vs_abo_surfaces,
       &brw_tcs_pull_constants,
       &brw_tcs_ubo_surfaces,
-      &brw_tcs_abo_surfaces,
       &brw_tes_pull_constants,
       &brw_tes_ubo_surfaces,
-      &brw_tes_abo_surfaces,
       &brw_gs_pull_constants,
       &brw_gs_ubo_surfaces,
-      &brw_gs_abo_surfaces,
       &brw_wm_pull_constants,
       &brw_wm_ubo_surfaces,
-      &brw_wm_abo_surfaces,
       &gen6_renderbuffer_surfaces,
       &brw_renderbuffer_read_surfaces,
       &brw_texture_surfaces,
@@ -5644,7 +5662,6 @@ genX(init_atoms)(struct brw_context *brw)
       &genX(cs_push_constants),
       &genX(cs_pull_constants),
       &brw_cs_ubo_surfaces,
-      &brw_cs_abo_surfaces,
       &brw_cs_texture_surfaces,
       &brw_cs_work_groups_surface,
       &genX(cs_samplers),