i965: Convert SF_STATE to genxml.
[mesa.git] / src / mesa / drivers / dri / i965 / genX_state_upload.c
index a2ed2e72710989b3966778d9fbb209132d35fef2..a5a9d51bde4644892aa1b5e2c6539576b6129ef7 100644 (file)
@@ -154,6 +154,29 @@ vertex_bo(struct brw_bo *bo, uint32_t offset)
    };
 }
 
+#if GEN_GEN == 4
+static inline struct brw_address
+KSP(struct brw_context *brw, uint32_t offset)
+{
+   return instruction_bo(brw->cache.bo, offset);
+}
+
+static inline struct brw_address
+KSP_ro(struct brw_context *brw, uint32_t offset)
+{
+   return instruction_ro_bo(brw->cache.bo, offset);
+}
+#else
+static inline uint32_t
+KSP(struct brw_context *brw, uint32_t offset)
+{
+   return offset;
+}
+
+#define KSP_ro KSP
+
+#endif
+
 #include "genxml/genX_pack.h"
 
 #define _brw_cmd_length(cmd) cmd ## _length
@@ -344,7 +367,9 @@ genX(emit_vertex_buffer_state)(struct brw_context *brw,
 #endif
 #endif
 
-#if GEN_GEN == 9
+#if GEN_GEN == 10
+      .VertexBufferMOCS = CNL_MOCS_WB,
+#elif GEN_GEN == 9
       .VertexBufferMOCS = SKL_MOCS_WB,
 #elif GEN_GEN == 8
       .VertexBufferMOCS = BDW_MOCS_WB,
@@ -1316,7 +1341,7 @@ genX(upload_clip_state)(struct brw_context *brw)
          clip.ClipMode = CLIPMODE_NORMAL;
       }
 
-      clip.ClipEnable = brw->primitive != _3DPRIM_RECTLIST;
+      clip.ClipEnable = true;
 
       /* _NEW_POLYGON,
        * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
@@ -1355,7 +1380,6 @@ static const struct brw_tracked_state genX(clip_state) = {
 
 /* ---------------------------------------------------------------------- */
 
-#if GEN_GEN >= 6
 static void
 genX(upload_sf)(struct brw_context *brw)
 {
@@ -1365,11 +1389,48 @@ genX(upload_sf)(struct brw_context *brw)
 #if GEN_GEN <= 7
    /* _NEW_BUFFERS */
    bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
-   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+   UNUSED const bool multisampled_fbo =
+      _mesa_geometric_samples(ctx->DrawBuffer) > 1;
 #endif
 
+#if GEN_GEN < 6
+   const struct brw_sf_prog_data *sf_prog_data = brw->sf.prog_data;
+
+   ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+
+   brw_state_emit(brw, GENX(SF_STATE), 64, &brw->sf.state_offset, sf) {
+      sf.KernelStartPointer = KSP_ro(brw, brw->sf.prog_offset);
+      sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
+      sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;
+      sf.DispatchGRFStartRegisterForURBData = 3;
+      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
+      sf.VertexURBEntryReadLength = sf_prog_data->urb_read_length;
+      sf.NumberofURBEntries = brw->urb.nr_sf_entries;
+      sf.URBEntryAllocationSize = brw->urb.sfsize - 1;
+
+      /* STATE_PREFETCH command description describes this state as being
+       * something loaded through the GPE (L2 ISC), so it's INSTRUCTION
+       * domain.
+       */
+      sf.SetupViewportStateOffset =
+         instruction_ro_bo(brw->batch.bo, brw->sf.vp_offset);
+
+      sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
+
+      /* sf.ConstantURBEntryReadLength = stage_prog_data->curb_read_length; */
+      /* sf.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2; */
+
+      sf.MaximumNumberofThreads =
+         MIN2(GEN_GEN == 5 ? 48 : 24, brw->urb.nr_sf_entries) - 1;
+
+      sf.SpritePointEnable = ctx->Point.PointSprite;
+
+      sf.DestinationOriginHorizontalBias = 0.5;
+      sf.DestinationOriginVerticalBias = 0.5;
+#else
    brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
       sf.StatisticsEnable = true;
+#endif
       sf.ViewportTransformEnable = true;
 
 #if GEN_GEN == 7
@@ -1380,6 +1441,7 @@ genX(upload_sf)(struct brw_context *brw)
 #if GEN_GEN <= 7
       /* _NEW_POLYGON */
       sf.FrontWinding = ctx->Polygon._FrontBit == render_to_fbo;
+#if GEN_GEN >= 6
       sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
       sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
       sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
@@ -1412,6 +1474,14 @@ genX(upload_sf)(struct brw_context *brw)
             unreachable("not reached");
       }
 
+      if (multisampled_fbo && ctx->Multisample.Enabled)
+         sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
+
+      sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
+      sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
+      sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
+#endif
+
       sf.ScissorRectangleEnable = true;
 
       if (ctx->Polygon.CullFlag) {
@@ -1436,12 +1506,6 @@ genX(upload_sf)(struct brw_context *brw)
       sf.LineStippleEnable = ctx->Line.StippleFlag;
 #endif
 
-      if (multisampled_fbo && ctx->Multisample.Enabled)
-         sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
-
-      sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
-      sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
-      sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
 #endif
 
       /* _NEW_LINE */
@@ -1477,7 +1541,9 @@ genX(upload_sf)(struct brw_context *brw)
          sf.SmoothPointEnable = true;
 #endif
 
+#if GEN_IS_G4X || GEN_GEN >= 5
       sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
+#endif
 
       /* _NEW_LIGHT */
       if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
@@ -1527,14 +1593,21 @@ static const struct brw_tracked_state genX(sf_state) = {
    .dirty = {
       .mesa  = _NEW_LIGHT |
                _NEW_LINE |
-               _NEW_MULTISAMPLE |
                _NEW_POINT |
                _NEW_PROGRAM |
+               (GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0) |
                (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
       .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
                BRW_NEW_VUE_MAP_GEOM_OUT |
-               (GEN_GEN <= 7 ? BRW_NEW_GS_PROG_DATA |
+               (GEN_GEN <= 5 ? BRW_NEW_BATCH |
+                               BRW_NEW_PROGRAM_CACHE |
+                               BRW_NEW_SF_PROG_DATA |
+                               BRW_NEW_SF_VP |
+                               BRW_NEW_URB_FENCE
+                             : 0) |
+               (GEN_GEN >= 6 ? BRW_NEW_CONTEXT : 0) |
+               (GEN_GEN >= 6 && GEN_GEN <= 7 ?
+                               BRW_NEW_GS_PROG_DATA |
                                BRW_NEW_PRIMITIVE |
                                BRW_NEW_TES_PROG_DATA
                              : 0) |
@@ -1544,7 +1617,6 @@ static const struct brw_tracked_state genX(sf_state) = {
    },
    .emit = genX(upload_sf),
 };
-#endif
 
 /* ---------------------------------------------------------------------- */
 
@@ -1729,20 +1801,6 @@ static const struct brw_tracked_state genX(wm_state) = {
 
 /* ---------------------------------------------------------------------- */
 
-#if GEN_GEN == 4
-static inline struct brw_address
-KSP(struct brw_context *brw, uint32_t offset)
-{
-   return instruction_bo(brw->cache.bo, offset);
-}
-#else
-static inline uint32_t
-KSP(struct brw_context *brw, uint32_t offset)
-{
-   return offset;
-}
-#endif
-
 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
    pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset);           \
    pkt.SamplerCount       =                                               \
@@ -2435,6 +2493,20 @@ static const struct brw_tracked_state genX(gs_state) = {
 
 /* ---------------------------------------------------------------------- */
 
+UNUSED static GLenum
+fix_dual_blend_alpha_to_one(GLenum function)
+{
+   switch (function) {
+   case GL_SRC1_ALPHA:
+      return GL_ONE;
+
+   case GL_ONE_MINUS_SRC1_ALPHA:
+      return GL_ZERO;
+   }
+
+   return function;
+}
+
 #define blend_factor(x) brw_translate_blend_factor(x)
 #define blend_eqn(x) brw_translate_blend_equation(x)
 
@@ -2562,6 +2634,19 @@ genX(upload_blend_state)(struct brw_context *brw)
                dstA = brw_fix_xRGB_alpha(dstA);
             }
 
+            /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
+             * "If Dual Source Blending is enabled, this bit must be disabled."
+             *
+             * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO,
+             * and leave it enabled anyway.
+             */
+            if (ctx->Color.Blend[i]._UsesDualSrc && blend.AlphaToOneEnable) {
+               srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
+               srcA = fix_dual_blend_alpha_to_one(srcA);
+               dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
+               dstA = fix_dual_blend_alpha_to_one(dstA);
+            }
+
             entry.ColorBufferBlendEnable = true;
             entry.DestinationBlendFactor = blend_factor(dstRGB);
             entry.SourceBlendFactor = blend_factor(srcRGB);
@@ -2600,16 +2685,6 @@ genX(upload_blend_state)(struct brw_context *brw)
          entry.WriteDisableBlue  = !ctx->Color.ColorMask[i][2];
          entry.WriteDisableAlpha = !ctx->Color.ColorMask[i][3];
 
-         /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
-          * "If Dual Source Blending is enabled, this bit must be disabled."
-          */
-         WARN_ONCE(ctx->Color.Blend[i]._UsesDualSrc &&
-                   _mesa_is_multisample_enabled(ctx) &&
-                   ctx->Multisample.SampleAlphaToOne,
-                   "HW workaround: disabling alpha to one with dual src "
-                   "blending\n");
-         if (ctx->Color.Blend[i]._UsesDualSrc)
-            blend.AlphaToOneEnable = false;
 #if GEN_GEN >= 8
          GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
 #else
@@ -2674,14 +2749,12 @@ upload_constant_state(struct brw_context *brw,
       pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
       if (active) {
 #if GEN_GEN >= 8 || GEN_IS_HASWELL
-         pkt.ConstantBody.ConstantBuffer2ReadLength =
-            stage_state->push_const_size;
-         pkt.ConstantBody.PointerToConstantBuffer2 =
+         pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
+         pkt.ConstantBody.Buffer[2] =
             render_ro_bo(brw->curbe.curbe_bo, stage_state->push_const_offset);
 #else
-         pkt.ConstantBody.ConstantBuffer0ReadLength =
-            stage_state->push_const_size;
-         pkt.ConstantBody.PointerToConstantBuffer0.offset =
+         pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
+         pkt.ConstantBody.Buffer[0].offset =
             stage_state->push_const_offset | mocs;
 #endif
       }
@@ -3078,25 +3151,17 @@ genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
     * command feels strange -- each dword pair contains a SO_DECL per stream.
     */
    for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
-      int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
-      struct GENX(SO_DECL) decl = {0};
-      int varying = linked_xfb_info->Outputs[i].OutputRegister;
-      const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
-      unsigned component_mask = (1 << components) - 1;
-      unsigned stream_id = linked_xfb_info->Outputs[i].StreamId;
-      unsigned decl_buffer_slot = buffer;
+      const struct gl_transform_feedback_output *output =
+         &linked_xfb_info->Outputs[i];
+      const int buffer = output->OutputBuffer;
+      const int varying = output->OutputRegister;
+      const unsigned stream_id = output->StreamId;
       assert(stream_id < MAX_VERTEX_STREAMS);
 
-      component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
-
       buffer_mask[stream_id] |= 1 << buffer;
 
       assert(vue_map->varying_to_slot[varying] >= 0);
 
-      decl.OutputBufferSlot = decl_buffer_slot;
-      decl.RegisterIndex = vue_map->varying_to_slot[varying];
-      decl.ComponentMask = component_mask;
-
       /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
        * array.  Instead, it simply increments DstOffset for the following
        * input by the number of components that should be skipped.
@@ -3107,31 +3172,25 @@ genX(upload_3dstate_so_decl_list)(struct brw_context *brw,
        * program as many size = 4 holes as we can, then a final hole to
        * accommodate the final 1, 2, or 3 remaining.
        */
-      int skip_components =
-         linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer];
+      int skip_components = output->DstOffset - next_offset[buffer];
 
-      next_offset[buffer] += skip_components;
-
-      while (skip_components >= 4) {
-         struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
-         d->HoleFlag = 1;
-         d->OutputBufferSlot = decl_buffer_slot;
-         d->ComponentMask = 0xf;
+      while (skip_components > 0) {
+         so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
+            .HoleFlag = 1,
+            .OutputBufferSlot = output->OutputBuffer,
+            .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
+         };
          skip_components -= 4;
       }
 
-      if (skip_components > 0) {
-         struct GENX(SO_DECL) *d = &so_decl[stream_id][decls[stream_id]++];
-         d->HoleFlag = 1;
-         d->OutputBufferSlot = decl_buffer_slot;
-         d->ComponentMask = (1 << skip_components) - 1;
-      }
+      next_offset[buffer] = output->DstOffset + output->NumComponents;
 
-      assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
-
-      next_offset[buffer] += components;
-
-      so_decl[stream_id][decls[stream_id]++] = decl;
+      so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
+         .OutputBufferSlot = output->OutputBuffer,
+         .RegisterIndex = vue_map->varying_to_slot[varying],
+         .ComponentMask =
+            ((1 << output->NumComponents) - 1) << output->ComponentOffset,
+      };
 
       if (decls[stream_id] > max_decls)
          max_decls = decls[stream_id];
@@ -3715,7 +3774,7 @@ genX(upload_cs_state)(struct brw_context *brw)
 
       const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
       vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
-      vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;;
+      vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
       vfe.ResetGatewayTimer =
          Resettingrelativetimerandlatchingtheglobaltimestamp;
 #if GEN_GEN < 9
@@ -4031,11 +4090,15 @@ genX(upload_ps_blend)(struct brw_context *brw)
       /* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */
       pb.HasWriteableRT = brw_color_buffer_write_enabled(brw);
 
+      bool alpha_to_one = false;
+
       if (!buffer0_is_integer) {
          /* _NEW_MULTISAMPLE */
-         pb.AlphaToCoverageEnable =
-            _mesa_is_multisample_enabled(ctx) &&
-            ctx->Multisample.SampleAlphaToCoverage;
+
+         if (_mesa_is_multisample_enabled(ctx)) {
+            pb.AlphaToCoverageEnable = ctx->Multisample.SampleAlphaToCoverage;
+            alpha_to_one = ctx->Multisample.SampleAlphaToOne;
+         }
 
          pb.AlphaTestEnable = color->AlphaEnabled;
       }
@@ -4080,6 +4143,16 @@ genX(upload_ps_blend)(struct brw_context *brw)
             dstA = brw_fix_xRGB_alpha(dstA);
          }
 
+         /* Alpha to One doesn't work with Dual Color Blending.  Override
+          * SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO.
+          */
+         if (alpha_to_one && color->Blend[0]._UsesDualSrc) {
+            srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
+            srcA = fix_dual_blend_alpha_to_one(srcA);
+            dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
+            dstA = fix_dual_blend_alpha_to_one(dstA);
+         }
+
          pb.ColorBufferBlendEnable = true;
          pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA);
          pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA);
@@ -4161,7 +4234,7 @@ genX(init_atoms)(struct brw_context *brw)
       /* These set up state for brw_psp_urb_cbs */
       &brw_wm_unit,
       &genX(sf_clip_viewport),
-      &brw_sf_unit,
+      &genX(sf_state),
       &genX(vs_state), /* always required, enabled or not */
       &brw_clip_unit,
       &brw_gs_unit,