i965: Support accelerated blit for depth 30 formats. (v2)
[mesa.git] / src / mesa / drivers / dri / i965 / genX_state_upload.c
index 39fc893357113711365fba1bd364611a8c4f71d5..50ac5bc59fff4f9558feba69abafde5dcfa29c9a 100644 (file)
@@ -34,9 +34,6 @@
 #include "main/state.h"
 
 #include "brw_context.h"
-#if GEN_GEN == 6
-#include "brw_defines.h"
-#endif
 #include "brw_draw.h"
 #include "brw_multisample_state.h"
 #include "brw_state.h"
@@ -75,17 +72,6 @@ struct brw_address {
    uint32_t offset;
 };
 
-static uint64_t
-emit_reloc(struct brw_context *brw,
-           void *location, struct brw_address address, uint32_t delta)
-{
-   uint32_t offset = (char *) location - (char *) brw->batch.map;
-
-   return brw_emit_reloc(&brw->batch, offset, address.bo,
-                         address.offset + delta,
-                         address.reloc_flags);
-}
-
 #define __gen_address_type struct brw_address
 #define __gen_user_data struct brw_context
 
@@ -93,10 +79,25 @@ static uint64_t
 __gen_combine_address(struct brw_context *brw, void *location,
                       struct brw_address address, uint32_t delta)
 {
+   struct intel_batchbuffer *batch = &brw->batch;
+   uint32_t offset;
+
    if (address.bo == NULL) {
       return address.offset + delta;
    } else {
-      return emit_reloc(brw, location, address, delta);
+      if (GEN_GEN < 6 && brw_ptr_in_state_buffer(batch, location)) {
+         offset = (char *) location - (char *) brw->batch.state.map;
+         return brw_state_reloc(batch, offset, address.bo,
+                                address.offset + delta,
+                                address.reloc_flags);
+      }
+
+      assert(!brw_ptr_in_state_buffer(batch, location));
+
+      offset = (char *) location - (char *) brw->batch.batch.map;
+      return brw_batch_reloc(batch, offset, address.bo,
+                             address.offset + delta,
+                             address.reloc_flags);
    }
 }
 
@@ -431,6 +432,7 @@ upload_format_size(uint32_t upload_format)
 static void
 genX(emit_vertices)(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
    uint32_t *dw;
 
    brw_prepare_vertices(brw);
@@ -563,7 +565,7 @@ genX(emit_vertices)(struct brw_context *brw)
           * vertex element may poke over the end of the buffer by 2 bytes.
           */
          const unsigned padding =
-            (GEN_GEN <= 7 && !GEN_IS_HASWELL && !brw->is_baytrail) * 2;
+            (GEN_GEN <= 7 && !GEN_IS_HASWELL && !devinfo->is_baytrail) * 2;
          const unsigned end = buffer->offset + buffer->size + padding;
          dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo,
                                              buffer->offset,
@@ -1014,6 +1016,9 @@ genX(calculate_attr_overrides)(const struct brw_context *brw,
    /* _NEW_POINT */
    const struct gl_point_attrib *point = &ctx->Point;
 
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
+
    /* BRW_NEW_FS_PROG_DATA */
    const struct brw_wm_prog_data *wm_prog_data =
       brw_wm_prog_data(brw->wm.base.prog_data);
@@ -1021,19 +1026,13 @@ genX(calculate_attr_overrides)(const struct brw_context *brw,
 
    *point_sprite_enables = 0;
 
-   /* BRW_NEW_FRAGMENT_PROGRAM
-    *
-    * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
-    * the full vertex header.  Otherwise, we can program the SF to start
-    * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
-    * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
-    * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
-    */
-
-   bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
-      (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+   int first_slot =
+      brw_compute_first_urb_slot_required(fp->info.inputs_read,
+                                          &brw->vue_map_geom_out);
 
-   *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+   /* Each URB offset packs two varying slots */
+   assert(first_slot % 2 == 0);
+   *urb_entry_read_offset = first_slot / 2;
 
    /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
     * description of dw10 Point Sprite Texture Coordinate Enable:
@@ -1277,7 +1276,7 @@ genX(upload_clip_state)(struct brw_context *brw)
       clip.GuardbandClipTestEnable = true;
 
       clip.ClipperViewportStatePointer =
-         ro_bo(brw->batch.bo, brw->clip.vp_offset);
+         ro_bo(brw->batch.state.bo, brw->clip.vp_offset);
 
       clip.ScreenSpaceViewportXMin = -1;
       clip.ScreenSpaceViewportXMax = 1;
@@ -1493,7 +1492,8 @@ genX(upload_sf)(struct brw_context *brw)
        * something loaded through the GPE (L2 ISC), so it's INSTRUCTION
        * domain.
        */
-      sf.SetupViewportStateOffset = ro_bo(brw->batch.bo, brw->sf.vp_offset);
+      sf.SetupViewportStateOffset =
+         ro_bo(brw->batch.state.bo, brw->sf.vp_offset);
 
       sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
 
@@ -1590,7 +1590,9 @@ genX(upload_sf)(struct brw_context *brw)
 
       /* _NEW_LINE */
 #if GEN_GEN == 8
-      if (brw->is_cherryview)
+      const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+      if (devinfo->is_cherryview)
          sf.CHVLineWidth = brw_get_line_width(brw);
       else
          sf.LineWidth = brw_get_line_width(brw);
@@ -1621,6 +1623,16 @@ genX(upload_sf)(struct brw_context *brw)
          sf.SmoothPointEnable = true;
 #endif
 
+#if GEN_GEN == 10
+      /* _NEW_BUFFERS
+       * Smooth Point Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
+       */
+      const bool multisampled_fbo =
+         _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+      if (multisampled_fbo)
+         sf.SmoothPointEnable = false;
+#endif
+
 #if GEN_IS_G4X || GEN_GEN >= 5
       sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
 #endif
@@ -1676,7 +1688,8 @@ static const struct brw_tracked_state genX(sf_state) = {
                _NEW_POINT |
                _NEW_PROGRAM |
                (GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0) |
-               (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
+               (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0) |
+               (GEN_GEN == 10 ? _NEW_BUFFERS : 0),
       .brw   = BRW_NEW_BLORP |
                BRW_NEW_VUE_MAP_GEOM_OUT |
                (GEN_GEN <= 5 ? BRW_NEW_BATCH |
@@ -1705,7 +1718,7 @@ brw_color_buffer_write_enabled(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
    /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct gl_program *fp = brw->fragment_program;
+   const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
    unsigned i;
 
    /* _NEW_BUFFERS */
@@ -1784,7 +1797,7 @@ genX(upload_wm)(struct brw_context *brw)
 
       if (stage_state->sampler_count)
          wm.SamplerStatePointer =
-            ro_bo(brw->batch.bo, stage_state->sampler_offset);
+            ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
 #if GEN_GEN == 5
       if (wm_prog_data->prog_offset_2)
          wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
@@ -2077,7 +2090,7 @@ genX(upload_vs_state)(struct brw_context *brw)
 
       vs.StatisticsEnable = false;
       vs.SamplerStatePointer =
-         ro_bo(brw->batch.bo, stage_state->sampler_offset);
+         ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
 #endif
 
 #if GEN_GEN == 5
@@ -2516,8 +2529,9 @@ genX(upload_gs_state)(struct brw_context *brw)
    UNUSED struct gl_context *ctx = &brw->ctx;
    UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
    const struct brw_stage_state *stage_state = &brw->gs.base;
+   const struct gl_program *gs_prog = brw->programs[MESA_SHADER_GEOMETRY];
    /* BRW_NEW_GEOMETRY_PROGRAM */
-   bool active = GEN_GEN >= 6 && brw->geometry_program;
+   bool active = GEN_GEN >= 6 && gs_prog;
 
    /* BRW_NEW_GS_PROG_DATA */
    struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
@@ -2613,7 +2627,7 @@ genX(upload_gs_state)(struct brw_context *brw)
 
 #if GEN_GEN < 7
          gs.SOStatisticsEnable = true;
-         if (brw->geometry_program->info.has_transform_feedback_varyings)
+         if (gs_prog->info.has_transform_feedback_varyings)
             gs.SVBIPayloadEnable = true;
 
          /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
@@ -3018,6 +3032,7 @@ UNUSED static const uint32_t push_constant_opcodes[] = {
 static void
 genX(upload_push_constant_packets)(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
    struct gl_context *ctx = &brw->ctx;
 
    UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0;
@@ -3030,7 +3045,7 @@ genX(upload_push_constant_packets)(struct brw_context *brw)
       &brw->wm.base,
    };
 
-   if (GEN_GEN == 7 && !GEN_IS_HASWELL && !brw->is_baytrail &&
+   if (GEN_GEN == 7 && !GEN_IS_HASWELL && !devinfo->is_baytrail &&
        stage_states[MESA_SHADER_VERTEX]->push_constants_dirty)
       gen7_emit_vs_workaround_flush(brw);
 
@@ -3067,7 +3082,7 @@ genX(upload_push_constant_packets)(struct brw_context *brw)
 
                const struct gl_uniform_block *block =
                   prog->sh.UniformBlocks[range->block];
-               const struct gl_uniform_buffer_binding *binding =
+               const struct gl_buffer_binding *binding =
                   &ctx->UniformBufferBindings[block->Binding];
 
                if (binding->BufferObject == ctx->Shared->NullBufferObj) {
@@ -3110,9 +3125,8 @@ genX(upload_push_constant_packets)(struct brw_context *brw)
       }
 
       stage_state->push_constants_dirty = false;
+      brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
    }
-
-   brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
 }
 
 const struct brw_tracked_state genX(push_constant_packets) = {
@@ -3130,13 +3144,12 @@ genX(upload_vs_push_constants)(struct brw_context *brw)
 {
    struct brw_stage_state *stage_state = &brw->vs.base;
 
-   /* _BRW_NEW_VERTEX_PROGRAM */
-   const struct brw_program *vp = brw_program_const(brw->vertex_program);
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
    /* BRW_NEW_VS_PROG_DATA */
    const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
 
-   _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_VERTEX);
-   gen6_upload_push_constants(brw, &vp->program, prog_data, stage_state);
+   gen6_upload_push_constants(brw, vp, prog_data, stage_state);
 }
 
 static const struct brw_tracked_state genX(vs_push_constants) = {
@@ -3157,15 +3170,12 @@ genX(upload_gs_push_constants)(struct brw_context *brw)
    struct brw_stage_state *stage_state = &brw->gs.base;
 
    /* BRW_NEW_GEOMETRY_PROGRAM */
-   const struct brw_program *gp = brw_program_const(brw->geometry_program);
+   const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
 
-   if (gp) {
-      /* BRW_NEW_GS_PROG_DATA */
-      struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
+   /* BRW_NEW_GS_PROG_DATA */
+   struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
 
-      _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_GEOMETRY);
-      gen6_upload_push_constants(brw, &gp->program, prog_data, stage_state);
-   }
+   gen6_upload_push_constants(brw, gp, prog_data, stage_state);
 }
 
 static const struct brw_tracked_state genX(gs_push_constants) = {
@@ -3185,13 +3195,11 @@ genX(upload_wm_push_constants)(struct brw_context *brw)
 {
    struct brw_stage_state *stage_state = &brw->wm.base;
    /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct brw_program *fp = brw_program_const(brw->fragment_program);
+   const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
    /* BRW_NEW_FS_PROG_DATA */
    const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
 
-   _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
-
-   gen6_upload_push_constants(brw, &fp->program, prog_data, stage_state);
+   gen6_upload_push_constants(brw, fp, prog_data, stage_state);
 }
 
 static const struct brw_tracked_state genX(wm_push_constants) = {
@@ -3287,7 +3295,8 @@ genX(upload_multisample_state)(struct brw_context *brw)
 
 static const struct brw_tracked_state genX(multisample_state) = {
    .dirty = {
-      .mesa = _NEW_MULTISAMPLE,
+      .mesa = _NEW_MULTISAMPLE |
+              (GEN_GEN == 10 ? _NEW_BUFFERS : 0),
       .brw = BRW_NEW_BLORP |
              BRW_NEW_CONTEXT |
              BRW_NEW_NUM_SAMPLES,
@@ -3320,7 +3329,8 @@ genX(upload_color_calc_state)(struct brw_context *brw)
 
       cc.StatisticsEnable = brw->stats_wm;
 
-      cc.CCViewportStatePointer = ro_bo(brw->batch.bo, brw->cc.vp_offset);
+      cc.CCViewportStatePointer =
+         ro_bo(brw->batch.state.bo, brw->cc.vp_offset);
 #else
       /* _NEW_COLOR */
       cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
@@ -3379,6 +3389,8 @@ static void
 genX(upload_sbe)(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   UNUSED const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
    /* BRW_NEW_FS_PROG_DATA */
    const struct brw_wm_prog_data *wm_prog_data =
       brw_wm_prog_data(brw->wm.base.prog_data);
@@ -3439,17 +3451,9 @@ genX(upload_sbe)(struct brw_context *brw)
 
 #if GEN_GEN >= 9
       /* prepare the active component dwords */
-      int input_index = 0;
-      for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
-         if (!(brw->fragment_program->info.inputs_read &
-               BITFIELD64_BIT(attr))) {
-            continue;
-         }
-
-         assert(input_index < 32);
-
+      const int num_inputs = urb_entry_read_length * 2;
+      for (int input_index = 0; input_index < num_inputs; input_index++) {
          sbe.AttributeActiveComponentFormat[input_index] = ACTIVE_COMPONENT_XYZW;
-         ++input_index;
       }
 #endif
    }
@@ -3865,7 +3869,6 @@ genX(upload_ps)(struct brw_context *brw)
       else
          ps.PositionXYOffsetSelect = POSOFFSET_NONE;
 
-      ps.RenderTargetFastClearEnable = brw->wm.fast_clear_op;
       ps._8PixelDispatchEnable = prog_data->dispatch_8;
       ps._16PixelDispatchEnable = prog_data->dispatch_16;
       ps.DispatchGRFStartRegisterForConstantSetupData0 =
@@ -3990,7 +3993,7 @@ static void
 upload_te_state(struct brw_context *brw)
 {
    /* BRW_NEW_TESS_PROGRAMS */
-   bool active = brw->tess_eval_program;
+   bool active = brw->programs[MESA_SHADER_TESS_EVAL];
 
    /* BRW_NEW_TES_PROG_DATA */
    const struct brw_tes_prog_data *tes_prog_data =
@@ -4028,14 +4031,11 @@ genX(upload_tes_push_constants)(struct brw_context *brw)
 {
    struct brw_stage_state *stage_state = &brw->tes.base;
    /* BRW_NEW_TESS_PROGRAMS */
-   const struct brw_program *tep = brw_program_const(brw->tess_eval_program);
+   const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
 
-   if (tep) {
-      /* BRW_NEW_TES_PROG_DATA */
-      const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
-      _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_EVAL);
-      gen6_upload_push_constants(brw, &tep->program, prog_data, stage_state);
-   }
+   /* BRW_NEW_TES_PROG_DATA */
+   const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
+   gen6_upload_push_constants(brw, tep, prog_data, stage_state);
 }
 
 static const struct brw_tracked_state genX(tes_push_constants) = {
@@ -4054,16 +4054,12 @@ genX(upload_tcs_push_constants)(struct brw_context *brw)
 {
    struct brw_stage_state *stage_state = &brw->tcs.base;
    /* BRW_NEW_TESS_PROGRAMS */
-   const struct brw_program *tcp = brw_program_const(brw->tess_ctrl_program);
-   bool active = brw->tess_eval_program;
+   const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
 
-   if (active) {
-      /* BRW_NEW_TCS_PROG_DATA */
-      const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
+   /* BRW_NEW_TCS_PROG_DATA */
+   const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
 
-      _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_CTRL);
-      gen6_upload_push_constants(brw, &tcp->program, prog_data, stage_state);
-   }
+   gen6_upload_push_constants(brw, tcp, prog_data, stage_state);
 }
 
 static const struct brw_tracked_state genX(tcs_push_constants) = {
@@ -4083,6 +4079,68 @@ static const struct brw_tracked_state genX(tcs_push_constants) = {
 /* ---------------------------------------------------------------------- */
 
 #if GEN_GEN >= 7
+static void
+genX(upload_cs_push_constants)(struct brw_context *brw)
+{
+   struct brw_stage_state *stage_state = &brw->cs.base;
+
+   /* BRW_NEW_COMPUTE_PROGRAM */
+   const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
+
+   if (cp) {
+      /* BRW_NEW_CS_PROG_DATA */
+      struct brw_cs_prog_data *cs_prog_data =
+         brw_cs_prog_data(brw->cs.base.prog_data);
+
+      _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
+      brw_upload_cs_push_constants(brw, cp, cs_prog_data, stage_state);
+   }
+}
+
+const struct brw_tracked_state genX(cs_push_constants) = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_BLORP |
+             BRW_NEW_COMPUTE_PROGRAM |
+             BRW_NEW_CS_PROG_DATA,
+   },
+   .emit = genX(upload_cs_push_constants),
+};
+
+/**
+ * Creates a new CS constant buffer reflecting the current CS program's
+ * constants, if needed by the CS program.
+ */
+static void
+genX(upload_cs_pull_constants)(struct brw_context *brw)
+{
+   struct brw_stage_state *stage_state = &brw->cs.base;
+
+   /* BRW_NEW_COMPUTE_PROGRAM */
+   struct brw_program *cp =
+      (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
+
+   /* BRW_NEW_CS_PROG_DATA */
+   const struct brw_stage_prog_data *prog_data = brw->cs.base.prog_data;
+
+   _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
+   /* _NEW_PROGRAM_CONSTANTS */
+   brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program,
+                             stage_state, prog_data);
+}
+
+const struct brw_tracked_state genX(cs_pull_constants) = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_BLORP |
+             BRW_NEW_COMPUTE_PROGRAM |
+             BRW_NEW_CS_PROG_DATA,
+   },
+   .emit = genX(upload_cs_pull_constants),
+};
+
 static void
 genX(upload_cs_state)(struct brw_context *brw)
 {
@@ -4111,30 +4169,49 @@ genX(upload_cs_state)(struct brw_context *brw)
    uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes,
                                     32, &stage_state->bind_bo_offset);
 
+   /* The MEDIA_VFE_STATE documentation for Gen8+ says:
+    *
+    * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
+    *  the only bits that are changed are scoreboard related: Scoreboard
+    *  Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
+    *  these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient."
+    *
+    * Earlier generations say "MI_FLUSH" instead of "stalling PIPE_CONTROL",
+    * but MI_FLUSH isn't really a thing, so we assume they meant PIPE_CONTROL.
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+
    brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
       if (prog_data->total_scratch) {
-         uint32_t bo_offset;
+         uint32_t per_thread_scratch_value;
 
          if (GEN_GEN >= 8) {
             /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
              * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
              */
-            bo_offset = ffs(stage_state->per_thread_scratch) - 11;
+            per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 11;
          } else if (GEN_IS_HASWELL) {
             /* Haswell's Per Thread Scratch Space is in the range [0, 10]
              * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
              */
-            bo_offset = ffs(stage_state->per_thread_scratch) - 12;
+            per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 12;
          } else {
             /* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
              * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
              */
-            bo_offset = stage_state->per_thread_scratch / 1024 - 1;
+            per_thread_scratch_value = stage_state->per_thread_scratch / 1024 - 1;
          }
-         vfe.ScratchSpaceBasePointer =
-            rw_bo(stage_state->scratch_bo, bo_offset);
+         vfe.ScratchSpaceBasePointer = rw_bo(stage_state->scratch_bo, 0);
+         vfe.PerThreadScratchSpace = per_thread_scratch_value;
       }
 
+      /* If brw->screen->subslice_total is greater than one, then
+       * devinfo->max_cs_threads stores number of threads per sub-slice;
+       * thus we need to multiply by that number by subslices to get
+       * the actual maximum number of threads; the -1 is because the HW
+       * has a bias of 1 (would not make sense to say the maximum number
+       * of threads is 0).
+       */
       const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
       vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
       vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
@@ -4187,7 +4264,7 @@ genX(upload_cs_state)(struct brw_context *brw)
    const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
       .KernelStartPointer = brw->cs.base.prog_offset,
       .SamplerStatePointer = stage_state->sampler_offset,
-      .SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4) >> 2,
+      .SamplerCount = DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4),
       .BindingTablePointer = stage_state->bind_bo_offset,
       .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
       .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
@@ -4229,16 +4306,16 @@ static const struct brw_tracked_state genX(cs_state) = {
 static void
 genX(upload_raster)(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->ctx;
+   const struct gl_context *ctx = &brw->ctx;
 
    /* _NEW_BUFFERS */
-   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
 
    /* _NEW_POLYGON */
-   struct gl_polygon_attrib *polygon = &ctx->Polygon;
+   const struct gl_polygon_attrib *polygon = &ctx->Polygon;
 
    /* _NEW_POINT */
-   struct gl_point_attrib *point = &ctx->Point;
+   const struct gl_point_attrib *point = &ctx->Point;
 
    brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
       if (brw->polygon_front_bit == render_to_fbo)
@@ -4262,7 +4339,7 @@ genX(upload_raster)(struct brw_context *brw)
          raster.CullMode = CULLMODE_NONE;
       }
 
-      point->SmoothFlag = raster.SmoothPointEnable;
+      raster.SmoothPointEnable = point->SmoothFlag;
 
       raster.DXMultisampleRasterizationEnable =
          _mesa_is_multisample_enabled(ctx);
@@ -4302,6 +4379,16 @@ genX(upload_raster)(struct brw_context *brw)
       /* _NEW_LINE */
       raster.AntialiasingEnable = ctx->Line.SmoothFlag;
 
+#if GEN_GEN == 10
+      /* _NEW_BUFFERS
+       * Antialiasing Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
+       */
+      const bool multisampled_fbo =
+         _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+      if (multisampled_fbo)
+         raster.AntialiasingEnable = false;
+#endif
+
       /* _NEW_SCISSOR */
       raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
 
@@ -5011,13 +5098,12 @@ genX(update_sampler_state)(struct brw_context *brw,
                                  texObj->StencilSampling,
                                  &border_color_offset);
    }
-   if (GEN_GEN < 6) {
+#if GEN_GEN < 6
       samp_st.BorderColorPointer =
-         brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
-                        brw->batch.bo, border_color_offset, 0);
-   } else {
+         ro_bo(brw->batch.state.bo, border_color_offset);
+#else
       samp_st.BorderColorPointer = border_color_offset;
-   }
+#endif
 
 #if GEN_GEN >= 8
    samp_st.LODPreClampMode = CLAMP_MODE_OGL;
@@ -5104,7 +5190,7 @@ static void
 genX(upload_fs_samplers)(struct brw_context *brw)
 {
    /* BRW_NEW_FRAGMENT_PROGRAM */
-   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+   struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
    genX(upload_sampler_state_table)(brw, fs, &brw->wm.base);
 }
 
@@ -5122,7 +5208,7 @@ static void
 genX(upload_vs_samplers)(struct brw_context *brw)
 {
    /* BRW_NEW_VERTEX_PROGRAM */
-   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+   struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
    genX(upload_sampler_state_table)(brw, vs, &brw->vs.base);
 }
 
@@ -5141,7 +5227,7 @@ static void
 genX(upload_gs_samplers)(struct brw_context *brw)
 {
    /* BRW_NEW_GEOMETRY_PROGRAM */
-   struct gl_program *gs = (struct gl_program *) brw->geometry_program;
+   struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
    if (!gs)
       return;
 
@@ -5165,7 +5251,7 @@ static void
 genX(upload_tcs_samplers)(struct brw_context *brw)
 {
    /* BRW_NEW_TESS_PROGRAMS */
-   struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
+   struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
    if (!tcs)
       return;
 
@@ -5188,7 +5274,7 @@ static void
 genX(upload_tes_samplers)(struct brw_context *brw)
 {
    /* BRW_NEW_TESS_PROGRAMS */
-   struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
+   struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
    if (!tes)
       return;
 
@@ -5211,7 +5297,7 @@ static void
 genX(upload_cs_samplers)(struct brw_context *brw)
 {
    /* BRW_NEW_COMPUTE_PROGRAM */
-   struct gl_program *cs = (struct gl_program *) brw->compute_program;
+   struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
    if (!cs)
       return;
 
@@ -5297,8 +5383,6 @@ genX(init_atoms)(struct brw_context *brw)
 
       /* Command packets:
        */
-      &brw_invariant_state,
-
       &brw_binding_table_pointers,
       &genX(blend_constant_color),
 
@@ -5414,19 +5498,14 @@ genX(init_atoms)(struct brw_context *brw)
        */
       &brw_vs_pull_constants,
       &brw_vs_ubo_surfaces,
-      &brw_vs_abo_surfaces,
       &brw_tcs_pull_constants,
       &brw_tcs_ubo_surfaces,
-      &brw_tcs_abo_surfaces,
       &brw_tes_pull_constants,
       &brw_tes_ubo_surfaces,
-      &brw_tes_abo_surfaces,
       &brw_gs_pull_constants,
       &brw_gs_ubo_surfaces,
-      &brw_gs_abo_surfaces,
       &brw_wm_pull_constants,
       &brw_wm_ubo_surfaces,
-      &brw_wm_abo_surfaces,
       &gen6_renderbuffer_surfaces,
       &brw_renderbuffer_read_surfaces,
       &brw_texture_surfaces,
@@ -5506,19 +5585,14 @@ genX(init_atoms)(struct brw_context *brw)
        */
       &brw_vs_pull_constants,
       &brw_vs_ubo_surfaces,
-      &brw_vs_abo_surfaces,
       &brw_tcs_pull_constants,
       &brw_tcs_ubo_surfaces,
-      &brw_tcs_abo_surfaces,
       &brw_tes_pull_constants,
       &brw_tes_ubo_surfaces,
-      &brw_tes_abo_surfaces,
       &brw_gs_pull_constants,
       &brw_gs_ubo_surfaces,
-      &brw_gs_abo_surfaces,
       &brw_wm_pull_constants,
       &brw_wm_ubo_surfaces,
-      &brw_wm_abo_surfaces,
       &gen6_renderbuffer_surfaces,
       &brw_renderbuffer_read_surfaces,
       &brw_texture_surfaces,
@@ -5585,10 +5659,9 @@ genX(init_atoms)(struct brw_context *brw)
    {
       &gen7_l3_state,
       &brw_cs_image_surfaces,
-      &gen7_cs_push_constants,
-      &brw_cs_pull_constants,
+      &genX(cs_push_constants),
+      &genX(cs_pull_constants),
       &brw_cs_ubo_surfaces,
-      &brw_cs_abo_surfaces,
       &brw_cs_texture_surfaces,
       &brw_cs_work_groups_surface,
       &genX(cs_samplers),