i965: Port gen6+ 3DSTATE_SF to genxml.
authorRafael Antognolli <rafael.antognolli@intel.com>
Fri, 17 Mar 2017 17:23:45 +0000 (10:23 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 4 May 2017 01:57:51 +0000 (18:57 -0700)
Emit sf state on Gen6+ using brw_batch_emit helper, using pack structs
from genxml.

v3:
   - Reorganize code and reduce #if/#endif's (Ken)
   - Style fixes (Ken)
   - Always set AALINEDISTANCE_TRUE (Ken)

Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/gen6_sf_state.c
src/mesa/drivers/dri/i965/gen7_sf_state.c
src/mesa/drivers/dri/i965/gen8_sf_state.c
src/mesa/drivers/dri/i965/genX_state_upload.c

index 3a10a8a43e0ec1b88e6f11b9b2884d7e49ae9ea8..594757cac93d46134e09e617ff1cac9b92ba16a0 100644 (file)
@@ -119,7 +119,6 @@ extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
 extern const struct brw_tracked_state gen6_sampler_state;
 extern const struct brw_tracked_state gen6_scissor_state;
 extern const struct brw_tracked_state gen6_sol_surface;
-extern const struct brw_tracked_state gen6_sf_state;
 extern const struct brw_tracked_state gen6_sf_vp;
 extern const struct brw_tracked_state gen6_urb;
 extern const struct brw_tracked_state gen6_viewport_state;
@@ -137,7 +136,6 @@ extern const struct brw_tracked_state gen7_ps_state;
 extern const struct brw_tracked_state gen7_push_constant_space;
 extern const struct brw_tracked_state gen7_sbe_state;
 extern const struct brw_tracked_state gen7_sf_clip_viewport;
-extern const struct brw_tracked_state gen7_sf_state;
 extern const struct brw_tracked_state gen7_sol_state;
 extern const struct brw_tracked_state gen7_te_state;
 extern const struct brw_tracked_state gen7_tes_push_constants;
@@ -157,7 +155,6 @@ extern const struct brw_tracked_state gen8_ps_extra;
 extern const struct brw_tracked_state gen8_ps_state;
 extern const struct brw_tracked_state gen8_wm_state;
 extern const struct brw_tracked_state gen8_sbe_state;
-extern const struct brw_tracked_state gen8_sf_state;
 extern const struct brw_tracked_state gen8_sf_clip_viewport;
 extern const struct brw_tracked_state gen8_vertices;
 extern const struct brw_tracked_state gen8_vf_topology;
index 0f118b6678bd95361b78c7ee6b5cc22f648b5027..45b5769a40b8b6ab916e19462c46d5a3a4a22156 100644 (file)
@@ -263,192 +263,3 @@ calculate_attr_overrides(const struct brw_context *brw,
     */
    *urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
 }
-
-
-static void
-upload_sf_state(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_FS_PROG_DATA */
-   const struct brw_wm_prog_data *wm_prog_data =
-      brw_wm_prog_data(brw->wm.base.prog_data);
-   uint32_t num_outputs = wm_prog_data->num_varying_inputs;
-   uint32_t dw1, dw2, dw3, dw4;
-   uint32_t point_sprite_enables;
-   int i;
-   /* _NEW_BUFFER */
-   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
-   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-
-   float point_size;
-   uint16_t attr_overrides[16];
-   uint32_t point_sprite_origin;
-
-   dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;
-   dw2 = GEN6_SF_STATISTICS_ENABLE;
-   dw3 = GEN6_SF_SCISSOR_ENABLE | GEN6_SF_LINE_AA_MODE_TRUE;
-   dw4 = 0;
-
-   if (brw->sf.viewport_transform_enable)
-       dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
-
-   /* _NEW_POLYGON */
-   if (ctx->Polygon._FrontBit == render_to_fbo)
-      dw2 |= GEN6_SF_WINDING_CCW;
-
-   if (ctx->Polygon.OffsetFill)
-       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
-
-   if (ctx->Polygon.OffsetLine)
-       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
-
-   if (ctx->Polygon.OffsetPoint)
-       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
-
-   switch (ctx->Polygon.FrontMode) {
-   case GL_FILL:
-       dw2 |= GEN6_SF_FRONT_SOLID;
-       break;
-
-   case GL_LINE:
-       dw2 |= GEN6_SF_FRONT_WIREFRAME;
-       break;
-
-   case GL_POINT:
-       dw2 |= GEN6_SF_FRONT_POINT;
-       break;
-
-   default:
-       unreachable("not reached");
-   }
-
-   switch (ctx->Polygon.BackMode) {
-   case GL_FILL:
-       dw2 |= GEN6_SF_BACK_SOLID;
-       break;
-
-   case GL_LINE:
-       dw2 |= GEN6_SF_BACK_WIREFRAME;
-       break;
-
-   case GL_POINT:
-       dw2 |= GEN6_SF_BACK_POINT;
-       break;
-
-   default:
-       unreachable("not reached");
-   }
-
-   /* _NEW_POLYGON */
-   if (ctx->Polygon.CullFlag) {
-      switch (ctx->Polygon.CullFaceMode) {
-      case GL_FRONT:
-        dw3 |= GEN6_SF_CULL_FRONT;
-        break;
-      case GL_BACK:
-        dw3 |= GEN6_SF_CULL_BACK;
-        break;
-      case GL_FRONT_AND_BACK:
-        dw3 |= GEN6_SF_CULL_BOTH;
-        break;
-      default:
-        unreachable("not reached");
-      }
-   } else {
-      dw3 |= GEN6_SF_CULL_NONE;
-   }
-
-   /* _NEW_LINE */
-   {
-      uint32_t line_width_u3_7 = brw_get_line_width(brw);
-      dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
-   }
-   if (ctx->Line.SmoothFlag) {
-      dw3 |= GEN6_SF_LINE_AA_ENABLE;
-      dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
-   }
-   /* _NEW_MULTISAMPLE */
-   if (multisampled_fbo && ctx->Multisample.Enabled)
-      dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
-
-   /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
-   if (use_state_point_size(brw))
-      dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
-
-   /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
-   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
-
-   /* Clamp to the hardware limits and convert to fixed point */
-   dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
-
-   /*
-    * Window coordinates in an FBO are inverted, which means point
-    * sprite origin must be inverted, too.
-    */
-   if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
-      point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
-   } else {
-      point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
-   }
-   dw1 |= point_sprite_origin;
-
-   /* _NEW_LIGHT */
-   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
-      dw4 |=
-        (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
-        (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
-        (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
-   } else {
-      dw4 |=
-        (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
-   }
-
-   /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
-    * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
-    */
-   uint32_t urb_entry_read_length;
-   uint32_t urb_entry_read_offset;
-   calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
-                            &urb_entry_read_length, &urb_entry_read_offset);
-   dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
-           urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
-
-   BEGIN_BATCH(20);
-   OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
-   OUT_BATCH(dw1);
-   OUT_BATCH(dw2);
-   OUT_BATCH(dw3);
-   OUT_BATCH(dw4);
-   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
-   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
-   OUT_BATCH_F(ctx->Polygon.OffsetClamp); /* global depth offset clamp */
-   for (i = 0; i < 8; i++) {
-      OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
-   }
-   OUT_BATCH(point_sprite_enables); /* dw16 */
-   OUT_BATCH(wm_prog_data->flat_inputs);
-   OUT_BATCH(0); /* wrapshortest enables 0-7 */
-   OUT_BATCH(0); /* wrapshortest enables 8-15 */
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gen6_sf_state = {
-   .dirty = {
-      .mesa  = _NEW_BUFFERS |
-               _NEW_LIGHT |
-               _NEW_LINE |
-               _NEW_MULTISAMPLE |
-               _NEW_POINT |
-               _NEW_POLYGON |
-               _NEW_PROGRAM,
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_FRAGMENT_PROGRAM |
-               BRW_NEW_FS_PROG_DATA |
-               BRW_NEW_GS_PROG_DATA |
-               BRW_NEW_PRIMITIVE |
-               BRW_NEW_TES_PROG_DATA |
-               BRW_NEW_VUE_MAP_GEOM_OUT,
-   },
-   .emit = upload_sf_state,
-};
index d577a3601539ccf9550d438c2c3da7b5bd19bce2..7ab8a99b13b6da85c1fc2db4db98a5782b7d741d 100644 (file)
@@ -107,159 +107,3 @@ const struct brw_tracked_state gen7_sbe_state = {
    },
    .emit = upload_sbe_state,
 };
-
-static void
-upload_sf_state(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   uint32_t dw1, dw2, dw3;
-   float point_size;
-   /* _NEW_BUFFERS */
-   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
-   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
-
-   dw1 = GEN6_SF_STATISTICS_ENABLE;
-
-   if (brw->sf.viewport_transform_enable)
-       dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
-
-   /* _NEW_BUFFERS */
-   dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
-
-   /* _NEW_POLYGON */
-   if (ctx->Polygon._FrontBit == render_to_fbo)
-      dw1 |= GEN6_SF_WINDING_CCW;
-
-   if (ctx->Polygon.OffsetFill)
-       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
-
-   if (ctx->Polygon.OffsetLine)
-       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
-
-   if (ctx->Polygon.OffsetPoint)
-       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
-
-   switch (ctx->Polygon.FrontMode) {
-   case GL_FILL:
-       dw1 |= GEN6_SF_FRONT_SOLID;
-       break;
-
-   case GL_LINE:
-       dw1 |= GEN6_SF_FRONT_WIREFRAME;
-       break;
-
-   case GL_POINT:
-       dw1 |= GEN6_SF_FRONT_POINT;
-       break;
-
-   default:
-       unreachable("not reached");
-   }
-
-   switch (ctx->Polygon.BackMode) {
-   case GL_FILL:
-       dw1 |= GEN6_SF_BACK_SOLID;
-       break;
-
-   case GL_LINE:
-       dw1 |= GEN6_SF_BACK_WIREFRAME;
-       break;
-
-   case GL_POINT:
-       dw1 |= GEN6_SF_BACK_POINT;
-       break;
-
-   default:
-       unreachable("not reached");
-   }
-
-   dw2 = GEN6_SF_SCISSOR_ENABLE;
-
-   if (ctx->Polygon.CullFlag) {
-      switch (ctx->Polygon.CullFaceMode) {
-      case GL_FRONT:
-        dw2 |= GEN6_SF_CULL_FRONT;
-        break;
-      case GL_BACK:
-        dw2 |= GEN6_SF_CULL_BACK;
-        break;
-      case GL_FRONT_AND_BACK:
-        dw2 |= GEN6_SF_CULL_BOTH;
-        break;
-      default:
-        unreachable("not reached");
-      }
-   } else {
-      dw2 |= GEN6_SF_CULL_NONE;
-   }
-
-   /* _NEW_LINE */
-   {
-      uint32_t line_width_u3_7 = brw_get_line_width(brw);
-      dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
-   }
-   if (ctx->Line.SmoothFlag) {
-      dw2 |= GEN6_SF_LINE_AA_ENABLE;
-      dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
-   }
-   if (ctx->Line.StippleFlag && brw->is_haswell) {
-      dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
-   }
-   /* _NEW_MULTISAMPLE */
-   if (multisampled_fbo && ctx->Multisample.Enabled)
-      dw2 |= GEN6_SF_MSRAST_ON_PATTERN;
-
-   /* FINISHME: Last Pixel Enable?  Vertex Sub Pixel Precision Select?
-    */
-
-   dw3 = GEN6_SF_LINE_AA_MODE_TRUE;
-
-   /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
-   if (use_state_point_size(brw))
-      dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
-
-   /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
-   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
-
-   /* Clamp to the hardware limits and convert to fixed point */
-   dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
-
-   /* _NEW_LIGHT */
-   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
-      dw3 |=
-        (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
-        (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
-        (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
-   } else {
-      dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
-   }
-
-   BEGIN_BATCH(7);
-   OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
-   OUT_BATCH(dw1);
-   OUT_BATCH(dw2);
-   OUT_BATCH(dw3);
-   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
-   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
-   OUT_BATCH_F(ctx->Polygon.OffsetClamp); /* global depth offset clamp */
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gen7_sf_state = {
-   .dirty = {
-      .mesa  = _NEW_BUFFERS |
-               _NEW_LIGHT |
-               _NEW_LINE |
-               _NEW_MULTISAMPLE |
-               _NEW_POINT |
-               _NEW_POLYGON |
-               _NEW_PROGRAM,
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_GS_PROG_DATA |
-               BRW_NEW_PRIMITIVE |
-               BRW_NEW_TES_PROG_DATA |
-               BRW_NEW_VUE_MAP_GEOM_OUT,
-   },
-   .emit = upload_sf_state,
-};
index d47adcdb3d028d827ec58ea4372d1c3091204139..3b1dd61643546a8c087782a9b21155a0add5facf 100644 (file)
@@ -151,76 +151,3 @@ const struct brw_tracked_state gen8_sbe_state = {
    },
    .emit = upload_sbe,
 };
-
-static void
-upload_sf(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->ctx;
-   uint32_t dw1 = 0, dw2 = 0, dw3 = 0;
-   float point_size;
-
-   dw1 = GEN6_SF_STATISTICS_ENABLE;
-
-   if (brw->sf.viewport_transform_enable)
-       dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
-
-   /* _NEW_LINE */
-   uint32_t line_width_u3_7 = brw_get_line_width(brw);
-   if (brw->gen >= 9 || brw->is_cherryview) {
-      dw1 |= line_width_u3_7 << GEN9_SF_LINE_WIDTH_SHIFT;
-   } else {
-      dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
-   }
-
-   if (ctx->Line.SmoothFlag) {
-      dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
-   }
-
-   /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
-   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
-
-   /* Clamp to the hardware limits and convert to fixed point */
-   dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
-
-   /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
-   if (use_state_point_size(brw))
-      dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
-
-   /* _NEW_POINT | _NEW_MULTISAMPLE */
-   if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
-       !ctx->Point.PointSprite) {
-      dw3 |= GEN8_SF_SMOOTH_POINT_ENABLE;
-   }
-
-   dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
-
-   /* _NEW_LIGHT */
-   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
-      dw3 |= (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
-             (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
-             (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
-   } else {
-      dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
-   }
-
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_SF << 16 | (4 - 2));
-   OUT_BATCH(dw1);
-   OUT_BATCH(dw2);
-   OUT_BATCH(dw3);
-   ADVANCE_BATCH();
-}
-
-const struct brw_tracked_state gen8_sf_state = {
-   .dirty = {
-      .mesa  = _NEW_LIGHT |
-               _NEW_PROGRAM |
-               _NEW_LINE |
-               _NEW_MULTISAMPLE |
-               _NEW_POINT,
-      .brw   = BRW_NEW_BLORP |
-               BRW_NEW_CONTEXT |
-               BRW_NEW_VUE_MAP_GEOM_OUT,
-   },
-   .emit = upload_sf,
-};
index ac467316183b8161ca120a77cff93eab755ca8fe..84d227173f10a8ad577b1ec0fce780857ae14ef7 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "brw_context.h"
 #include "brw_state.h"
+#include "brw_util.h"
 
 #include "intel_batchbuffer.h"
 #include "intel_fbo.h"
@@ -111,6 +112,235 @@ __gen_combine_address(struct brw_context *brw, void *location,
         _brw_cmd_pack(cmd)(brw, (void *)_dst, &name),              \
         _dst = NULL)
 
+#if GEN_GEN == 6
+/**
+ * Determine the appropriate attribute override value to store into the
+ * 3DSTATE_SF structure for a given fragment shader attribute.  The attribute
+ * override value contains two pieces of information: the location of the
+ * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
+ * flag indicating whether to "swizzle" the attribute based on the direction
+ * the triangle is facing.
+ *
+ * If an attribute is "swizzled", then the given VUE location is used for
+ * front-facing triangles, and the VUE location that immediately follows is
+ * used for back-facing triangles.  We use this to implement the mapping from
+ * gl_FrontColor/gl_BackColor to gl_Color.
+ *
+ * urb_entry_read_offset is the offset into the VUE at which the SF unit is
+ * being instructed to begin reading attribute data.  It can be set to a
+ * nonzero value to prevent the SF unit from wasting time reading elements of
+ * the VUE that are not needed by the fragment shader.  It is measured in
+ * 256-bit increments.
+ */
+static void
+genX(get_attr_override)(struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr,
+                        const struct brw_vue_map *vue_map,
+                        int urb_entry_read_offset, int fs_attr,
+                        bool two_side_color, uint32_t *max_source_attr)
+{
+   /* Find the VUE slot for this attribute. */
+   int slot = vue_map->varying_to_slot[fs_attr];
+
+   /* Viewport and Layer are stored in the VUE header.  We need to override
+    * them to zero if earlier stages didn't write them, as GL requires that
+    * they read back as zero when not explicitly set.
+    */
+   if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
+      attr->ComponentOverrideX = true;
+      attr->ComponentOverrideW = true;
+      attr->ConstantSource = CONST_0000;
+
+      if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
+         attr->ComponentOverrideY = true;
+      if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
+         attr->ComponentOverrideZ = true;
+
+      return;
+   }
+
+   /* If there was only a back color written but not front, use back
+    * as the color instead of undefined
+    */
+   if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
+      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
+   if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
+      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
+
+   if (slot == -1) {
+      /* This attribute does not exist in the VUE--that means that the vertex
+       * shader did not write to it.  This means that either:
+       *
+       * (a) This attribute is a texture coordinate, and it is going to be
+       * replaced with point coordinates (as a consequence of a call to
+       * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
+       * hardware will ignore whatever attribute override we supply.
+       *
+       * (b) This attribute is read by the fragment shader but not written by
+       * the vertex shader, so its value is undefined.  Therefore the
+       * attribute override we supply doesn't matter.
+       *
+       * (c) This attribute is gl_PrimitiveID, and it wasn't written by the
+       * previous shader stage.
+       *
+       * Note that we don't have to worry about the cases where the attribute
+       * is gl_PointCoord or is undergoing point sprite coordinate
+       * replacement, because in those cases, this function isn't called.
+       *
+       * In case (c), we need to program the attribute overrides so that the
+       * primitive ID will be stored in this slot.  In every other case, the
+       * attribute override we supply doesn't matter.  So just go ahead and
+       * program primitive ID in every case.
+       */
+      attr->ComponentOverrideW = true;
+      attr->ComponentOverrideX = true;
+      attr->ComponentOverrideY = true;
+      attr->ComponentOverrideZ = true;
+      attr->ConstantSource = PRIM_ID;
+      return;
+   }
+
+   /* Compute the location of the attribute relative to urb_entry_read_offset.
+    * Each increment of urb_entry_read_offset represents a 256-bit value, so
+    * it counts for two 128-bit VUE slots.
+    */
+   int source_attr = slot - 2 * urb_entry_read_offset;
+   assert(source_attr >= 0 && source_attr < 32);
+
+   /* If we are doing two-sided color, and the VUE slot following this one
+    * represents a back-facing color, then we need to instruct the SF unit to
+    * do back-facing swizzling.
+    */
+   bool swizzling = two_side_color &&
+      ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
+        vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
+       (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
+        vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
+
+   /* Update max_source_attr.  If swizzling, the SF will read this slot + 1. */
+   if (*max_source_attr < source_attr + swizzling)
+      *max_source_attr = source_attr + swizzling;
+
+   attr->SourceAttribute = source_attr;
+   if (swizzling)
+      attr->SwizzleSelect = INPUTATTR_FACING;
+}
+
+
+static void
+genX(calculate_attr_overrides)(const struct brw_context *brw,
+                               struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr_overrides,
+                               uint32_t *point_sprite_enables,
+                               uint32_t *urb_entry_read_length,
+                               uint32_t *urb_entry_read_offset)
+{
+   const struct gl_context *ctx = &brw->ctx;
+
+   /* _NEW_POINT */
+   const struct gl_point_attrib *point = &ctx->Point;
+
+   /* BRW_NEW_FS_PROG_DATA */
+   const struct brw_wm_prog_data *wm_prog_data =
+      brw_wm_prog_data(brw->wm.base.prog_data);
+   uint32_t max_source_attr = 0;
+
+   *point_sprite_enables = 0;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM
+    *
+    * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
+    * the full vertex header.  Otherwise, we can program the SF to start
+    * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
+    * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
+    * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
+    */
+
+   bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
+      (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
+   *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+
+   /* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
+    * description of dw10 Point Sprite Texture Coordinate Enable:
+    *
+    * "This field must be programmed to zero when non-point primitives
+    * are rendered."
+    *
+    * The SandyBridge PRM doesn't explicitly say that point sprite enables
+    * must be programmed to zero when rendering non-point primitives, but
+    * the IvyBridge PRM does, and if we don't, we get garbage.
+    *
+    * This is not required on Haswell, as the hardware ignores this state
+    * when drawing non-points -- although we do still need to be careful to
+    * correctly set the attr overrides.
+    *
+    * _NEW_POLYGON
+    * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
+    */
+   bool drawing_points = brw_is_drawing_points(brw);
+
+   for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+      int input_index = wm_prog_data->urb_setup[attr];
+
+      if (input_index < 0)
+         continue;
+
+      /* _NEW_POINT */
+      bool point_sprite = false;
+      if (drawing_points) {
+         if (point->PointSprite &&
+             (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
+             (point->CoordReplace & (1u << (attr - VARYING_SLOT_TEX0)))) {
+            point_sprite = true;
+         }
+
+         if (attr == VARYING_SLOT_PNTC)
+            point_sprite = true;
+
+         if (point_sprite)
+            *point_sprite_enables |= (1 << input_index);
+      }
+
+      /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
+      struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attribute = { 0 };
+
+      if (!point_sprite) {
+         genX(get_attr_override)(&attribute,
+                                 &brw->vue_map_geom_out,
+                                 *urb_entry_read_offset, attr,
+                                 brw->ctx.VertexProgram._TwoSideEnabled,
+                                 &max_source_attr);
+      }
+
+      /* The hardware can only do the overrides on 16 overrides at a
+       * time, and the other up to 16 have to be lined up so that the
+       * input index = the output index.  We'll need to do some
+       * tweaking to make sure that's the case.
+       */
+      if (input_index < 16)
+         attr_overrides[input_index] = attribute;
+      else
+         assert(attribute.SourceAttribute == input_index);
+   }
+
+   /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
+    * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
+    *
+    * "This field should be set to the minimum length required to read the
+    *  maximum source attribute.  The maximum source attribute is indicated
+    *  by the maximum value of the enabled Attribute # Source Attribute if
+    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
+    *  enable is not set.
+    *  read_length = ceiling((max_source_attr + 1) / 2)
+    *
+    *  [errata] Corruption/Hang possible if length programmed larger than
+    *  recommended"
+    *
+    * Similar text exists for Ivy Bridge.
+    */
+   *urb_entry_read_length = DIV_ROUND_UP(max_source_attr + 1, 2);
+}
+#endif
+
 /* ---------------------------------------------------------------------- */
 
 #if GEN_GEN >= 6
@@ -338,6 +568,190 @@ static const struct brw_tracked_state genX(clip_state) = {
    .emit = genX(upload_clip_state),
 };
 
+/* ---------------------------------------------------------------------- */
+
+static void
+genX(upload_sf)(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   float point_size;
+
+#if GEN_GEN <= 7
+   /* _NEW_BUFFERS */
+   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+#endif
+
+   brw_batch_emit(brw, GENX(3DSTATE_SF), sf) {
+      sf.StatisticsEnable = true;
+      sf.ViewportTransformEnable = brw->sf.viewport_transform_enable;
+
+#if GEN_GEN == 7
+      /* _NEW_BUFFERS */
+      sf.DepthBufferSurfaceFormat = brw_depthbuffer_format(brw);
+#endif
+
+#if GEN_GEN <= 7
+      /* _NEW_POLYGON */
+      sf.FrontWinding = ctx->Polygon._FrontBit == render_to_fbo;
+      sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
+      sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
+      sf.GlobalDepthOffsetEnablePoint = ctx->Polygon.OffsetPoint;
+
+      switch (ctx->Polygon.FrontMode) {
+         case GL_FILL:
+            sf.FrontFaceFillMode = FILL_MODE_SOLID;
+            break;
+         case GL_LINE:
+            sf.FrontFaceFillMode = FILL_MODE_WIREFRAME;
+            break;
+         case GL_POINT:
+            sf.FrontFaceFillMode = FILL_MODE_POINT;
+            break;
+         default:
+            unreachable("not reached");
+      }
+
+      switch (ctx->Polygon.BackMode) {
+         case GL_FILL:
+            sf.BackFaceFillMode = FILL_MODE_SOLID;
+            break;
+         case GL_LINE:
+            sf.BackFaceFillMode = FILL_MODE_WIREFRAME;
+            break;
+         case GL_POINT:
+            sf.BackFaceFillMode = FILL_MODE_POINT;
+            break;
+         default:
+            unreachable("not reached");
+      }
+
+      sf.ScissorRectangleEnable = true;
+
+      if (ctx->Polygon.CullFlag) {
+         switch (ctx->Polygon.CullFaceMode) {
+            case GL_FRONT:
+               sf.CullMode = CULLMODE_FRONT;
+               break;
+            case GL_BACK:
+               sf.CullMode = CULLMODE_BACK;
+               break;
+            case GL_FRONT_AND_BACK:
+               sf.CullMode = CULLMODE_BOTH;
+               break;
+            default:
+               unreachable("not reached");
+         }
+      } else {
+         sf.CullMode = CULLMODE_NONE;
+      }
+
+#if GEN_IS_HASWELL
+      sf.LineStippleEnable = ctx->Line.StippleFlag;
+#endif
+
+      if (multisampled_fbo && ctx->Multisample.Enabled)
+         sf.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
+
+      sf.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
+      sf.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
+      sf.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
+#endif
+
+      /* _NEW_LINE */
+      sf.LineWidth = brw_get_line_width_float(brw);
+
+      if (ctx->Line.SmoothFlag) {
+         sf.LineEndCapAntialiasingRegionWidth = _10pixels;
+#if GEN_GEN <= 7
+         sf.AntiAliasingEnable = true;
+#endif
+      }
+
+      /* _NEW_POINT - Clamp to ARB_point_parameters user limits */
+      point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
+      /* Clamp to the hardware limits */
+      sf.PointWidth = CLAMP(point_size, 0.125f, 255.875f);
+
+      /* _NEW_PROGRAM | _NEW_POINT, BRW_NEW_VUE_MAP_GEOM_OUT */
+      if (use_state_point_size(brw))
+         sf.PointWidthSource = State;
+
+#if GEN_GEN >= 8
+      /* _NEW_POINT | _NEW_MULTISAMPLE */
+      if ((ctx->Point.SmoothFlag || _mesa_is_multisample_enabled(ctx)) &&
+          !ctx->Point.PointSprite)
+         sf.SmoothPointEnable = true;
+#endif
+
+      sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
+
+      /* _NEW_LIGHT */
+      if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+         sf.TriangleStripListProvokingVertexSelect = 2;
+         sf.TriangleFanProvokingVertexSelect = 2;
+         sf.LineStripListProvokingVertexSelect = 1;
+      } else {
+         sf.TriangleFanProvokingVertexSelect = 1;
+      }
+
+#if GEN_GEN == 6
+      /* BRW_NEW_FS_PROG_DATA */
+      const struct brw_wm_prog_data *wm_prog_data =
+         brw_wm_prog_data(brw->wm.base.prog_data);
+
+      sf.AttributeSwizzleEnable = true;
+      sf.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
+
+      /*
+       * Window coordinates in an FBO are inverted, which means point
+       * sprite origin must be inverted, too.
+       */
+      if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
+         sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
+      } else {
+         sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
+      }
+
+      /* BRW_NEW_VUE_MAP_GEOM_OUT | BRW_NEW_FRAGMENT_PROGRAM |
+       * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
+       */
+      uint32_t urb_entry_read_length;
+      uint32_t urb_entry_read_offset;
+      uint32_t point_sprite_enables;
+      genX(calculate_attr_overrides)(brw, sf.Attribute, &point_sprite_enables,
+                                     &urb_entry_read_length,
+                                     &urb_entry_read_offset);
+      sf.VertexURBEntryReadLength = urb_entry_read_length;
+      sf.VertexURBEntryReadOffset = urb_entry_read_offset;
+      sf.PointSpriteTextureCoordinateEnable = point_sprite_enables;
+      sf.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
+#endif
+   }
+}
+
+static const struct brw_tracked_state genX(sf_state) = {
+   .dirty = {
+      .mesa  = _NEW_LIGHT |
+               _NEW_LINE |
+               _NEW_MULTISAMPLE |
+               _NEW_POINT |
+               _NEW_PROGRAM |
+               (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
+      .brw   = BRW_NEW_BLORP |
+               BRW_NEW_CONTEXT |
+               BRW_NEW_VUE_MAP_GEOM_OUT |
+               (GEN_GEN <= 7 ? BRW_NEW_GS_PROG_DATA |
+                               BRW_NEW_PRIMITIVE |
+                               BRW_NEW_TES_PROG_DATA
+                             : 0) |
+               (GEN_GEN == 6 ? BRW_NEW_FS_PROG_DATA |
+                               BRW_NEW_FRAGMENT_PROGRAM
+                             : 0),
+   },
+   .emit = genX(upload_sf),
+};
+
 #endif
 
 /* ---------------------------------------------------------------------- */
@@ -570,7 +984,7 @@ genX(init_atoms)(struct brw_context *brw)
       &gen6_vs_state,
       &gen6_gs_state,
       &genX(clip_state),
-      &gen6_sf_state,
+      &genX(sf_state),
       &gen6_wm_state,
 
       &gen6_scissor_state,
@@ -659,7 +1073,7 @@ genX(init_atoms)(struct brw_context *brw)
       &gen7_sol_state,
       &genX(clip_state),
       &gen7_sbe_state,
-      &gen7_sf_state,
+      &genX(sf_state),
       &gen7_wm_state,
       &gen7_ps_state,
 
@@ -747,7 +1161,7 @@ genX(init_atoms)(struct brw_context *brw)
       &genX(clip_state),
       &genX(raster_state),
       &gen8_sbe_state,
-      &gen8_sf_state,
+      &genX(sf_state),
       &gen8_ps_blend,
       &gen8_ps_extra,
       &gen8_ps_state,