i965: Convert SF_STATE to genxml.

[mesa.git] / src / mesa / drivers / dri / i965 / genX_state_upload.c
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c

index a2ed2e72710989b3966778d9fbb209132d35fef2..a5a9d51bde4644892aa1b5e2c6539576b6129ef7 100644 (file)
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -154,6 +154,29 @@ vertex_bo(struct brw_bo *bo, uint32_t offset)
     };
  }
  
+#if GEN_GEN == 4
+static inline struct brw_address
+KSP(struct brw_context *brw, uint32_t offset)
+{
+   return instruction_bo(brw->cache.bo, offset);
+}
+
+static inline struct brw_address
+KSP_ro(struct brw_context *brw, uint32_t offset)
+{
+   return instruction_ro_bo(brw->cache.bo, offset);
+}
+#else
+static inline uint32_t
+KSP(struct brw_context *brw, uint32_t offset)
+{
+   return offset;
+}
+
+#define KSP_ro KSP
+
+#endif
+
  #include "genxml/genX_pack.h"
  
  #define _brw_cmd_length(cmd) cmd ## _length
@@ -344,7 +367,9 @@ genX(emit_vertex_buffer_state)(struct brw_context *brw,
  #endif
  #endif
  
-#if GEN_GEN == 9
+#if GEN_GEN == 10
+      .VertexBufferMOCS = CNL_MOCS_WB,
+#elif GEN_GEN == 9
        .VertexBufferMOCS = SKL_MOCS_WB,
  #elif GEN_GEN == 8
        .VertexBufferMOCS = BDW_MOCS_WB,
@@ -1316,7 +1341,7 @@ genX(upload_clip_state)(struct brw_context *brw)
           clip.ClipMode = CLIPMODE_NORMAL;
        }
  
-      clip.ClipEnable = brw->primitive != _3DPRIM_RECTLIST;
+      clip.ClipEnable = true;
  
        /* _NEW_POLYGON,
         * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
@@ -1355,7 +1380,6 @@ static const struct brw_tracked_state genX(clip_state) = {
  
  /* ---------------------------------------------------------------------- */
  
-#if GEN_GEN >= 6
  static void
  genX(upload_sf)(struct brw_context *brw)
  {
@@ -1365,11 +1389,48 @@ genX(upload_sf)(struct brw_context *brw)
  #if GEN_GEN <= 7
     /* _NEW_BUFFERS */
     bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
-   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+   UNUSED const bool multisampled_fbo =
+      _mesa_geometric_samples(ctx->DrawBuffer) > 1;
  #endif
  
+#if GEN_GEN < 6
+   const struct brw_sf_prog_data *sf_prog_data = brw->sf.prog_data;
+
+   ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+
+   brw_state_emit(brw, GENX(SF_STATE), 64, &brw->sf.state_offset, sf) {
+      sf.KernelStartPointer = KSP_ro(brw, brw->sf.prog_offset);
+      sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
+      sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;
+      sf.DispatchGRFStartRegisterForURBData = 3;
+      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
+      sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length;
+      sf.NumberofURBEntries = brw->urb.nr_sf_entries;
+      sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
+
+      /* STATE_PREFETCH command description describes this state as being
+       * something loaded through the GPE (L2 ISC), so it's INSTRUCTION
+       * domain.
+       */
+      sf.SetupViewportStateOffset =
+         instruction_ro_bo(brw->batch.bo, brw->sf.vp_offset);
+
+      sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
+
+      /* sf.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; */
+      /* sf.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; */
+
+      sf.MaximumNumberofThreads =
+         MIN2(GEN_GEN == 5 ? 48 : 24, brw->urb.nr_sf_entries) - 1;
+
+      sf.SpritePointEnable = ctx->Point.PointSprite;
+
+      sf.DestinationOriginHorizontalBias = 0.5;
+      sf.DestinationOriginVerticalBias = 0.5;
+#else
     brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
        sf.StatisticsEnable = true;
+#endif
        sf.ViewportTransformEnable = true;
  
  #if GEN_GEN == 7
@@ -1380,6 +1441,7 @@ genX(upload_sf)(struct brw_context *brw)
  #if GEN_GEN <= 7
        /* _NEW_POLYGON */
        sf.FrontWinding = ctx->Polygon._FrontBit == render_to_fbo;
+#if GEN_GEN >= 6
        sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
        sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
        sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
@@ -1412,6 +1474,14 @@ genX(upload_sf)(struct brw_context *brw)
              unreachable("not reached");
        }
  
+      if (multisampled_fbo && ctx->Multisample.Enabled)
+         sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
+
+      sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
+      sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
+      sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
+#endif
+
        sf.ScissorRectangleEnable = true;
  
        if (ctx->Polygon.CullFlag) {
@@ -1436,12 +1506,6 @@ genX(upload_sf)(struct brw_context *brw)
        sf.LineStippleEnable = ctx->Line.StippleFlag;
  #endif
  
-      if (multisampled_fbo && ctx->Multisample.Enabled)
-         sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
-
-      sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
-      sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
-      sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
  #endif
  
        /* _NEW_LINE */
@@ -1477,7 +1541,9 @@ genX(upload_sf)(struct brw_context *brw)
           sf.SmoothPointEnable = true;
  #endif
  
+#if GEN_IS_G4X || GEN_GEN >= 5
        sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
+#endif
  
        /* _NEW_LIGHT */
        if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
@@ -1527,14 +1593,21 @@ static const struct brw_tracked_state genX(sf_state) = {
     .dirty = {
        .mesa  = _NEW_LIGHT |
                 _NEW_LINE |
-               _NEW_MULTISAMPLE |
                 _NEW_POINT |
                 _NEW_PROGRAM |
+               (GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0) |
                 (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
        .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
                 BRW_NEW_VUE_MAP_GEOM_OUT |
-               (GEN_GEN <= 7 ? BRW_NEW_GS_PROG_DATA |
+               (GEN_GEN <= 5 ? BRW_NEW_BATCH |
+                               BRW_NEW_PROGRAM_CACHE |
+                               BRW_NEW_SF_PROG_DATA |
+                               BRW_NEW_SF_VP |
+                               BRW_NEW_URB_FENCE
+                             : 0) |
+               (GEN_GEN >= 6 ? BRW_NEW_CONTEXT : 0) |
+               (GEN_GEN >= 6 && GEN_GEN <= 7 ?
+                               BRW_NEW_GS_PROG_DATA |
                                 BRW_NEW_PRIMITIVE |
                                 BRW_NEW_TES_PROG_DATA
                               : 0) |
@@ -1544,7 +1617,6 @@ static const struct brw_tracked_state genX(sf_state) = {
     },
     .emit = genX(upload_sf),
  };
-#endif
  
  /* ---------------------------------------------------------------------- */
  
@@ -1729,20 +1801,6 @@ static const struct brw_tracked_state genX(wm_state) = {
  
  /* ---------------------------------------------------------------------- */
  
-#if GEN_GEN == 4
-static inline struct brw_address
-KSP(struct brw_context *brw, uint32_t offset)
-{
-   return instruction_bo(brw->cache.bo, offset);
-}
-#else
-static inline uint32_t
-KSP(struct brw_context *brw, uint32_t offset)
-{
-   return offset;
-}
-#endif
-
  #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
     pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset);           \
     pkt.SamplerCount       =                                               \
@@ -2435,6 +2493,20 @@ static const struct brw_tracked_state genX(gs_state) = {
  
  /* ---------------------------------------------------------------------- */
  
+UNUSED static GLenum
+fix_dual_blend_alpha_to_one(GLenum function)
+{
+   switch (function) {
+   case GL_SRC1_ALPHA:
+      return GL_ONE;
+
+   case GL_ONE_MINUS_SRC1_ALPHA:
+      return GL_ZERO;
+   }
+
+   return function;
+}
+
  #define blend_factor(x) brw_translate_blend_factor(x)
  #define blend_eqn(x) brw_translate_blend_equation(x)
  
@@ -2562,6 +2634,19 @@ genX(upload_blend_state)(struct brw_context *brw)
                 dstA = brw_fix_xRGB_alpha(dstA);
              }
  
+            /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
+             * "If Dual Source Blending is enabled, this bit must be disabled."
+             *
+             * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO,
+             * and leave it enabled anyway.
+             */
+            if (ctx->Color.Blend[i]._UsesDualSrc && blend.AlphaToOneEnable) {
+               srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
+               srcA = fix_dual_blend_alpha_to_one(srcA);
+               dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
+               dstA = fix_dual_blend_alpha_to_one(dstA);
+            }
+
              entry.ColorBufferBlendEnable = true;
              entry.DestinationBlendFactor = blend_factor(dstRGB);
              entry.SourceBlendFactor = blend_factor(srcRGB);
@@ -2600,16 +2685,6 @@ genX(upload_blend_state)(struct brw_context *brw)
           entry.WriteDisableBlue  = !ctx->Color.ColorMask[i][2];
           entry.WriteDisableAlpha = !ctx->Color.ColorMask[i][3];
  
-         /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
-          * "If Dual Source Blending is enabled, this bit must be disabled."
-          */
-         WARN_ONCE(ctx->Color.Blend[i]._UsesDualSrc &&
-                   _mesa_is_multisample_enabled(ctx) &&
-                   ctx->Multisample.SampleAlphaToOne,
-                   "HW workaround: disabling alpha to one with dual src "
-                   "blending\n");
-         if (ctx->Color.Blend[i]._UsesDualSrc)
-            blend.AlphaToOneEnable = false;
  #if GEN_GEN >= 8
           GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
  #else
@@ -2674,14 +2749,12 @@ upload_constant_state(struct brw_context *brw,
        pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
        if (active) {
  #if GEN_GEN >= 8 || GEN_IS_HASWELL
-         pkt.ConstantBody.ConstantBuffer2ReadLength =
-            stage_state->push_const_size;
-         pkt.ConstantBody.PointerToConstantBuffer2 =
+         pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
+         pkt.ConstantBody.Buffer[2] =
              render_ro_bo(brw->curbe.curbe_bo, stage_state->push_const_offset);
  #else
-         pkt.ConstantBody.ConstantBuffer0ReadLength =
-            stage_state->push_const_size;
-         pkt.ConstantBody.PointerToConstantBuffer0.offset =
+         pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
+         pkt.ConstantBody.Buffer[0].offset =
              stage_state->push_const_offset | mocs;
  #endif
        }
@@ -3078,25 +3151,17 @@ genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
      * command feels strange -- each dword pair contains a SO_DECL per stream.
      */
     for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
-      int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
-      struct GENX(SO_DECL) decl = {0};
-      int varying = linked_xfb_info->Outputs[i].OutputRegister;
-      const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
-      unsigned component_mask = (1 << components) - 1;
-      unsigned stream_id = linked_xfb_info->Outputs[i].StreamId;
-      unsigned decl_buffer_slot = buffer;
+      const struct gl_transform_feedback_output *output =
+         &linked_xfb_info->Outputs[i];
+      const int buffer = output->OutputBuffer;
+      const int varying = output->OutputRegister;
+      const unsigned stream_id = output->StreamId;
        assert(stream_id < MAX_VERTEX_STREAMS);
  
-      component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
-
        buffer_mask[stream_id] |= 1 << buffer;
  
        assert(vue_map->varying_to_slot[varying] >= 0);
  
-      decl.OutputBufferSlot = decl_buffer_slot;
-      decl.RegisterIndex = vue_map->varying_to_slot[varying];
-      decl.ComponentMask = component_mask;
-
        /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
         * array.  Instead, it simply increments DstOffset for the following
         * input by the number of components that should be skipped.
@@ -3107,31 +3172,25 @@ genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
         * program as many size = 4 holes as we can, then a final hole to
         * accommodate the final 1, 2, or 3 remaining.
         */
-      int skip_components =
-         linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer];
+      int skip_components = output->DstOffset - next_offset[buffer];
  
-      next_offset[buffer] += skip_components;
-
-      while (skip_components >= 4) {
-         struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
-         d->HoleFlag = 1;
-         d->OutputBufferSlot = decl_buffer_slot;
-         d->ComponentMask = 0xf;
+      while (skip_components > 0) {
+         so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
+            .HoleFlag = 1,
+            .OutputBufferSlot = output->OutputBuffer,
+            .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
+         };
           skip_components -= 4;
        }
  
-      if (skip_components > 0) {
-         struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
-         d->HoleFlag = 1;
-         d->OutputBufferSlot = decl_buffer_slot;
-         d->ComponentMask = (1 << skip_components) - 1;
-      }
+      next_offset[buffer] = output->DstOffset + output->NumComponents;
  
-      assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
-
-      next_offset[buffer] += components;
-
-      so_decl[stream_id][decls[stream_id]++] = decl;
+      so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
+         .OutputBufferSlot = output->OutputBuffer,
+         .RegisterIndex = vue_map->varying_to_slot[varying],
+         .ComponentMask =
+            ((1 << output->NumComponents) - 1) << output->ComponentOffset,
+      };
  
        if (decls[stream_id] > max_decls)
           max_decls = decls[stream_id];
@@ -3715,7 +3774,7 @@ genX(upload_cs_state)(struct brw_context *brw)
  
        const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
        vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
-      vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;;
+      vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
        vfe.ResetGatewayTimer =
           Resettingrelativetimerandlatchingtheglobaltimestamp;
  #if GEN_GEN < 9
@@ -4031,11 +4090,15 @@ genX(upload_ps_blend)(struct brw_context *brw)
        /* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */
        pb.HasWriteableRT = brw_color_buffer_write_enabled(brw);
  
+      bool alpha_to_one = false;
+
        if (!buffer0_is_integer) {
           /* _NEW_MULTISAMPLE */
-         pb.AlphaToCoverageEnable =
-            _mesa_is_multisample_enabled(ctx) &&
-            ctx->Multisample.SampleAlphaToCoverage;
+
+         if (_mesa_is_multisample_enabled(ctx)) {
+            pb.AlphaToCoverageEnable = ctx->Multisample.SampleAlphaToCoverage;
+            alpha_to_one = ctx->Multisample.SampleAlphaToOne;
+         }
  
           pb.AlphaTestEnable = color->AlphaEnabled;
        }
@@ -4080,6 +4143,16 @@ genX(upload_ps_blend)(struct brw_context *brw)
              dstA = brw_fix_xRGB_alpha(dstA);
           }
  
+         /* Alpha to One doesn't work with Dual Color Blending.  Override
+          * SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO.
+          */
+         if (alpha_to_one && color->Blend[0]._UsesDualSrc) {
+            srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
+            srcA = fix_dual_blend_alpha_to_one(srcA);
+            dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
+            dstA = fix_dual_blend_alpha_to_one(dstA);
+         }
+
           pb.ColorBufferBlendEnable = true;
           pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA);
           pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA);
@@ -4161,7 +4234,7 @@ genX(init_atoms)(struct brw_context *brw)
        /* These set up state for brw_psp_urb_cbs */
        &brw_wm_unit,
        &genX(sf_clip_viewport),
-      &brw_sf_unit,
+      &genX(sf_state),
        &genX(vs_state), /* always required, enabled or not */
        &brw_clip_unit,
        &brw_gs_unit,