ilo: move ilo_state_3d* to core
authorChia-I Wu <olvaffe@gmail.com>
Fri, 1 May 2015 03:47:13 +0000 (11:47 +0800)
committerChia-I Wu <olvaffe@gmail.com>
Sat, 2 May 2015 14:14:06 +0000 (22:14 +0800)
ilo state structs (struct ilo_xxx_state) are moved as well.

12 files changed:
src/gallium/drivers/ilo/Makefile.sources
src/gallium/drivers/ilo/core/ilo_state_3d.h [new file with mode: 0644]
src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c [new file with mode: 0644]
src/gallium/drivers/ilo/core/ilo_state_3d_top.c [new file with mode: 0644]
src/gallium/drivers/ilo/ilo_blitter_rectlist.c
src/gallium/drivers/ilo/ilo_builder_3d_top.h
src/gallium/drivers/ilo/ilo_shader.c
src/gallium/drivers/ilo/ilo_state.c
src/gallium/drivers/ilo/ilo_state.h
src/gallium/drivers/ilo/ilo_state_3d.h [deleted file]
src/gallium/drivers/ilo/ilo_state_3d_bottom.c [deleted file]
src/gallium/drivers/ilo/ilo_state_3d_top.c [deleted file]

index fbb33b4e10a95a9b1736704927d66fe56f22070a..1743f7c7659c16c9692dbcb516404223a7b908c6 100644 (file)
@@ -10,6 +10,9 @@ C_SOURCES := \
        core/ilo_fence.h \
        core/ilo_image.c \
        core/ilo_image.h \
+       core/ilo_state_3d.h \
+       core/ilo_state_3d_bottom.c \
+       core/ilo_state_3d_top.c \
        core/intel_winsys.h \
        ilo_blit.c \
        ilo_blit.h \
@@ -57,9 +60,6 @@ C_SOURCES := \
        ilo_shader.h \
        ilo_state.c \
        ilo_state.h \
-       ilo_state_3d.h \
-       ilo_state_3d_bottom.c \
-       ilo_state_3d_top.c \
        ilo_transfer.c \
        ilo_transfer.h \
        ilo_video.c \
diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h
new file mode 100644 (file)
index 0000000..e772f21
--- /dev/null
@@ -0,0 +1,424 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2014 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_3D_H
+#define ILO_STATE_3D_H
+
+#include "genhw/genhw.h"
+#include "pipe/p_state.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/**
+ * \see brw_context.h
+ */
+#define ILO_MAX_DRAW_BUFFERS    8
+#define ILO_MAX_CONST_BUFFERS   (1 + 12)
+#define ILO_MAX_SAMPLER_VIEWS   16
+#define ILO_MAX_SAMPLERS        16
+#define ILO_MAX_SO_BINDINGS     64
+#define ILO_MAX_SO_BUFFERS      4
+#define ILO_MAX_VIEWPORTS       1
+
+#define ILO_MAX_SURFACES        256
+
+struct intel_bo;
+struct ilo_buffer;
+struct ilo_shader_state;
+struct ilo_texture;
+
+struct ilo_vb_state {
+   struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
+   uint32_t enabled_mask;
+};
+
+struct ilo_ib_state {
+   struct pipe_resource *buffer;
+   const void *user_buffer;
+   unsigned offset;
+   unsigned index_size;
+
+   /* these are not valid until the state is finalized */
+   struct pipe_resource *hw_resource;
+   unsigned hw_index_size;
+   /* an offset to be added to pipe_draw_info::start */
+   int64_t draw_start_offset;
+};
+
+struct ilo_ve_cso {
+   /* VERTEX_ELEMENT_STATE */
+   uint32_t payload[2];
+};
+
+struct ilo_ve_state {
+   struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS];
+   unsigned count;
+
+   unsigned instance_divisors[PIPE_MAX_ATTRIBS];
+   unsigned vb_mapping[PIPE_MAX_ATTRIBS];
+   unsigned vb_count;
+
+   /* these are not valid until the state is finalized */
+   struct ilo_ve_cso edgeflag_cso;
+   bool last_cso_edgeflag;
+
+   struct ilo_ve_cso nosrc_cso;
+   bool prepend_nosrc_cso;
+};
+
+struct ilo_so_state {
+   struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
+   unsigned count;
+   unsigned append_bitmask;
+
+   bool enabled;
+};
+
+struct ilo_viewport_cso {
+   /* matrix form */
+   float m00, m11, m22, m30, m31, m32;
+
+   /* guardband in NDC space */
+   float min_gbx, min_gby, max_gbx, max_gby;
+
+   /* viewport in screen space */
+   float min_x, min_y, min_z;
+   float max_x, max_y, max_z;
+};
+
+struct ilo_viewport_state {
+   struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS];
+   unsigned count;
+
+   struct pipe_viewport_state viewport0;
+};
+
+struct ilo_scissor_state {
+   /* SCISSOR_RECT */
+   uint32_t payload[ILO_MAX_VIEWPORTS * 2];
+
+   struct pipe_scissor_state scissor0;
+};
+
+struct ilo_rasterizer_clip {
+   /* 3DSTATE_CLIP */
+   uint32_t payload[3];
+
+   uint32_t can_enable_guardband;
+};
+
+struct ilo_rasterizer_sf {
+   /* 3DSTATE_SF */
+   uint32_t payload[3];
+   uint32_t dw_msaa;
+
+   /* Global Depth Offset Constant/Scale/Clamp */
+   uint32_t dw_depth_offset_const;
+   uint32_t dw_depth_offset_scale;
+   uint32_t dw_depth_offset_clamp;
+
+   /* Gen8+ 3DSTATE_RASTER */
+   uint32_t dw_raster;
+};
+
+struct ilo_rasterizer_wm {
+   /* 3DSTATE_WM */
+   uint32_t payload[2];
+   uint32_t dw_msaa_rast;
+   uint32_t dw_msaa_disp;
+};
+
+struct ilo_rasterizer_state {
+   struct pipe_rasterizer_state state;
+
+   struct ilo_rasterizer_clip clip;
+   struct ilo_rasterizer_sf sf;
+   struct ilo_rasterizer_wm wm;
+};
+
+struct ilo_dsa_state {
+   /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */
+   uint32_t payload[3];
+
+   uint32_t dw_blend_alpha;
+   uint32_t dw_ps_blend_alpha;
+   ubyte alpha_ref;
+};
+
+struct ilo_blend_cso {
+   /* BLEND_STATE */
+   uint32_t payload[2];
+
+   uint32_t dw_blend;
+   uint32_t dw_blend_dst_alpha_forced_one;
+};
+
+struct ilo_blend_state {
+   struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS];
+
+   bool dual_blend;
+   bool alpha_to_coverage;
+
+   uint32_t dw_shared;
+   uint32_t dw_alpha_mod;
+   uint32_t dw_logicop;
+
+   /* a part of 3DSTATE_PS_BLEND */
+   uint32_t dw_ps_blend;
+   uint32_t dw_ps_blend_dst_alpha_forced_one;
+};
+
+struct ilo_sampler_cso {
+   /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */
+   uint32_t payload[15];
+
+   uint32_t dw_filter;
+   uint32_t dw_filter_aniso;
+   uint32_t dw_wrap;
+   uint32_t dw_wrap_1d;
+   uint32_t dw_wrap_cube;
+
+   bool anisotropic;
+   bool saturate_r;
+   bool saturate_s;
+   bool saturate_t;
+};
+
+struct ilo_sampler_state {
+   const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
+};
+
+struct ilo_view_surface {
+   /* SURFACE_STATE */
+   uint32_t payload[13];
+   struct intel_bo *bo;
+
+   uint32_t scanout;
+};
+
+struct ilo_view_cso {
+   struct pipe_sampler_view base;
+
+   struct ilo_view_surface surface;
+};
+
+struct ilo_view_state {
+   struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
+   unsigned count;
+};
+
+struct ilo_cbuf_cso {
+   struct pipe_resource *resource;
+   struct ilo_view_surface surface;
+
+   /*
+    * this CSO is not so constant because user buffer needs to be uploaded in
+    * finalize_constant_buffers()
+    */
+   const void *user_buffer;
+   unsigned user_buffer_size;
+};
+
+struct ilo_cbuf_state {
+   struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
+   uint32_t enabled_mask;
+};
+
+struct ilo_resource_state {
+   struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
+   unsigned count;
+};
+
+struct ilo_surface_cso {
+   struct pipe_surface base;
+
+   bool is_rt;
+   union {
+      struct ilo_view_surface rt;
+      struct ilo_zs_surface {
+         uint32_t payload[12];
+         uint32_t dw_aligned_8x4;
+
+         struct intel_bo *bo;
+         struct intel_bo *hiz_bo;
+         struct intel_bo *separate_s8_bo;
+      } zs;
+   } u;
+};
+
+struct ilo_fb_state {
+   struct pipe_framebuffer_state state;
+
+   struct ilo_view_surface null_rt;
+   struct ilo_zs_surface null_zs;
+
+   struct ilo_fb_blend_caps {
+      bool can_logicop;
+      bool can_blend;
+      bool can_alpha_test;
+      bool dst_alpha_forced_one;
+   } blend_caps[PIPE_MAX_COLOR_BUFS];
+
+   unsigned num_samples;
+};
+
+struct ilo_shader_cso {
+   uint32_t payload[5];
+};
+
+/**
+ * Translate a pipe texture target to the matching hardware surface type.
+ */
+static inline int
+ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
+{
+   switch (target) {
+   case PIPE_BUFFER:
+      return GEN6_SURFTYPE_BUFFER;
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+      return GEN6_SURFTYPE_1D;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_2D_ARRAY:
+      return GEN6_SURFTYPE_2D;
+   case PIPE_TEXTURE_3D:
+      return GEN6_SURFTYPE_3D;
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return GEN6_SURFTYPE_CUBE;
+   default:
+      assert(!"unknown texture target");
+      return GEN6_SURFTYPE_BUFFER;
+   }
+}
+
+void
+ilo_gpe_init_ve(const struct ilo_dev *dev,
+                unsigned num_states,
+                const struct pipe_vertex_element *states,
+                struct ilo_ve_state *ve);
+
+void
+ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
+                        struct ilo_ve_cso *cso);
+
+void
+ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
+                      int comp0, int comp1, int comp2, int comp3,
+                      struct ilo_ve_cso *cso);
+
+void
+ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
+                         const struct pipe_viewport_state *state,
+                         struct ilo_viewport_cso *vp);
+
+void
+ilo_gpe_set_scissor(const struct ilo_dev *dev,
+                    unsigned start_slot,
+                    unsigned num_states,
+                    const struct pipe_scissor_state *states,
+                    struct ilo_scissor_state *scissor);
+
+void
+ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
+                         struct ilo_scissor_state *scissor);
+
+void
+ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
+                        const struct pipe_rasterizer_state *state,
+                        struct ilo_rasterizer_state *rasterizer);
+void
+ilo_gpe_init_dsa(const struct ilo_dev *dev,
+                 const struct pipe_depth_stencil_alpha_state *state,
+                 struct ilo_dsa_state *dsa);
+
+void
+ilo_gpe_init_blend(const struct ilo_dev *dev,
+                   const struct pipe_blend_state *state,
+                   struct ilo_blend_state *blend);
+
+void
+ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
+                         const struct pipe_sampler_state *state,
+                         struct ilo_sampler_cso *sampler);
+
+void
+ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
+                               unsigned width, unsigned height,
+                               unsigned depth, unsigned level,
+                               struct ilo_view_surface *surf);
+
+void
+ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
+                                     const struct ilo_buffer *buf,
+                                     unsigned offset, unsigned size,
+                                     unsigned struct_size,
+                                     enum pipe_format elem_format,
+                                     bool is_rt, bool render_cache_rw,
+                                     struct ilo_view_surface *surf);
+
+void
+ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev,
+                                      const struct ilo_texture *tex,
+                                      enum pipe_format format,
+                                      unsigned first_level,
+                                      unsigned num_levels,
+                                      unsigned first_layer,
+                                      unsigned num_layers,
+                                      bool is_rt,
+                                      struct ilo_view_surface *surf);
+
+void
+ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
+                        const struct ilo_texture *tex,
+                        enum pipe_format format, unsigned level,
+                        unsigned first_layer, unsigned num_layers,
+                        struct ilo_zs_surface *zs);
+
+void
+ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
+                    const struct ilo_shader_state *vs,
+                    struct ilo_shader_cso *cso);
+
+void
+ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
+                    const struct ilo_shader_state *gs,
+                    struct ilo_shader_cso *cso);
+
+void
+ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
+                    const struct ilo_shader_state *fs,
+                    struct ilo_shader_cso *cso);
+
+void
+ilo_gpe_set_fb(const struct ilo_dev *dev,
+               const struct pipe_framebuffer_state *state,
+               struct ilo_fb_state *fb);
+
+#endif /* ILO_STATE_3D_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c
new file mode 100644 (file)
index 0000000..291c86b
--- /dev/null
@@ -0,0 +1,2223 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2014 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "genhw/genhw.h"
+#include "util/u_dual_blend.h"
+#include "util/u_framebuffer.h"
+#include "util/u_half.h"
+
+#include "ilo_format.h"
+#include "ilo_state_3d.h"
+#include "../ilo_resource.h"
+#include "../ilo_shader.h"
+
+static void
+rasterizer_init_clip(const struct ilo_dev *dev,
+                     const struct pipe_rasterizer_state *state,
+                     struct ilo_rasterizer_clip *clip)
+{
+   uint32_t dw1, dw2, dw3;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   dw1 = GEN6_CLIP_DW1_STATISTICS;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 219:
+       *
+       *     "Workaround : Due to Hardware issue "EarlyCull" needs to be
+       *      enabled only for the cases where the incoming primitive topology
+       *      into the clipper guaranteed to be Trilist."
+       *
+       * What does this mean?
+       */
+      dw1 |= 0 << 19 |
+             GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
+
+      if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+         if (state->front_ccw)
+            dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
+
+         switch (state->cull_face) {
+         case PIPE_FACE_NONE:
+            dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE;
+            break;
+         case PIPE_FACE_FRONT:
+            dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT;
+            break;
+         case PIPE_FACE_BACK:
+            dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK;
+            break;
+         case PIPE_FACE_FRONT_AND_BACK:
+            dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH;
+            break;
+         }
+      }
+   }
+
+   dw2 = GEN6_CLIP_DW2_CLIP_ENABLE |
+         GEN6_CLIP_DW2_XY_TEST_ENABLE |
+         state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
+         GEN6_CLIP_DW2_CLIPMODE_NORMAL;
+
+   if (state->clip_halfz)
+      dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
+   else
+      dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
+
+   if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip)
+      dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
+
+   if (state->flatshade_first) {
+      dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
+             0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
+             1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+   }
+   else {
+      dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
+             1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
+             2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+   }
+
+   dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
+         0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT;
+
+   clip->payload[0] = dw1;
+   clip->payload[1] = dw2;
+   clip->payload[2] = dw3;
+
+   clip->can_enable_guardband = true;
+
+   /*
+    * There are several reasons that guard band test should be disabled
+    *
+    *  - GL wide points (to avoid partially visibie object)
+    *  - GL wide or AA lines (to avoid partially visibie object)
+    */
+   if (state->point_size_per_vertex || state->point_size > 1.0f)
+      clip->can_enable_guardband = false;
+   if (state->line_smooth || state->line_width > 1.0f)
+      clip->can_enable_guardband = false;
+}
+
+static void
+rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev,
+                                     const struct pipe_rasterizer_state *state,
+                                     struct ilo_rasterizer_sf *sf)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * Scale the constant term.  The minimum representable value used by the HW
+    * is not large enouch to be the minimum resolvable difference.
+    */
+   sf->dw_depth_offset_const = fui(state->offset_units * 2.0f);
+   sf->dw_depth_offset_scale = fui(state->offset_scale);
+   sf->dw_depth_offset_clamp = fui(state->offset_clamp);
+}
+
+static void
+rasterizer_init_sf_gen6(const struct ilo_dev *dev,
+                        const struct pipe_rasterizer_state *state,
+                        struct ilo_rasterizer_sf *sf)
+{
+   int line_width, point_width;
+   uint32_t dw1, dw2, dw3;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+    *
+    *     "This bit (Statistics Enable) should be set whenever clipping is
+    *      enabled and the Statistics Enable bit is set in CLIP_STATE. It
+    *      should be cleared if clipping is disabled or Statistics Enable in
+    *      CLIP_STATE is clear."
+    */
+   dw1 = GEN7_SF_DW1_STATISTICS |
+         GEN7_SF_DW1_VIEWPORT_ENABLE;
+
+   /* XXX GEN6 path seems to work fine for GEN7 */
+   if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 258:
+       *
+       *     "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
+       *      Enable Solid , Global Depth Offset Enable Wireframe, and Global
+       *      Depth Offset Enable Point) should be set whenever non zero depth
+       *      bias (Slope, Bias) values are used. Setting this bit may have
+       *      some degradation of performance for some workloads."
+       */
+      if (state->offset_tri || state->offset_line || state->offset_point) {
+         /* XXX need to scale offset_const according to the depth format */
+         dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET;
+
+         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID |
+                GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME |
+                GEN7_SF_DW1_DEPTH_OFFSET_POINT;
+      }
+   } else {
+      if (state->offset_tri)
+         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
+      if (state->offset_line)
+         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
+      if (state->offset_point)
+         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
+   }
+
+   switch (state->fill_front) {
+   case PIPE_POLYGON_MODE_FILL:
+      dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID;
+      break;
+   case PIPE_POLYGON_MODE_LINE:
+      dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME;
+      break;
+   case PIPE_POLYGON_MODE_POINT:
+      dw1 |= GEN7_SF_DW1_FRONTFACE_POINT;
+      break;
+   }
+
+   switch (state->fill_back) {
+   case PIPE_POLYGON_MODE_FILL:
+      dw1 |= GEN7_SF_DW1_BACKFACE_SOLID;
+      break;
+   case PIPE_POLYGON_MODE_LINE:
+      dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME;
+      break;
+   case PIPE_POLYGON_MODE_POINT:
+      dw1 |= GEN7_SF_DW1_BACKFACE_POINT;
+      break;
+   }
+
+   if (state->front_ccw)
+      dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW;
+
+   dw2 = 0;
+
+   if (state->line_smooth) {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+       *
+       *     "This field (Anti-aliasing Enable) must be disabled if any of the
+       *      render targets have integer (UINT or SINT) surface format."
+       *
+       * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+       *
+       *     "This field (Hierarchical Depth Buffer Enable) must be disabled
+       *      if Anti-aliasing Enable in 3DSTATE_SF is enabled.
+       *
+       * TODO We do not check those yet.
+       */
+      dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE |
+             GEN7_SF_DW2_AA_LINE_CAP_1_0;
+   }
+
+   switch (state->cull_face) {
+   case PIPE_FACE_NONE:
+      dw2 |= GEN7_SF_DW2_CULLMODE_NONE;
+      break;
+   case PIPE_FACE_FRONT:
+      dw2 |= GEN7_SF_DW2_CULLMODE_FRONT;
+      break;
+   case PIPE_FACE_BACK:
+      dw2 |= GEN7_SF_DW2_CULLMODE_BACK;
+      break;
+   case PIPE_FACE_FRONT_AND_BACK:
+      dw2 |= GEN7_SF_DW2_CULLMODE_BOTH;
+      break;
+   }
+
+   /*
+    * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
+    * pixels in the minor direction.  We have to make the lines slightly
+    * thicker, 0.5 pixel on both sides, so that they intersect that many
+    * pixels are considered into the lines.
+    *
+    * Line width is in U3.7.
+    */
+   line_width = (int)
+      ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
+   line_width = CLAMP(line_width, 0, 1023);
+
+   /* use GIQ rules */
+   if (line_width == 128 && !state->line_smooth)
+      line_width = 0;
+
+   dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
+
+   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable)
+      dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
+
+   if (state->scissor)
+      dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
+
+   dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
+         GEN7_SF_DW3_SUBPIXEL_8BITS;
+
+   if (state->line_last_pixel)
+      dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
+
+   if (state->flatshade_first) {
+      dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+             0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+             1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+   } else {
+      dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+             1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+             2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+   }
+
+   if (!state->point_size_per_vertex)
+      dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
+
+   /* in U8.3 */
+   point_width = (int) (state->point_size * 8.0f + 0.5f);
+   point_width = CLAMP(point_width, 1, 2047);
+
+   dw3 |= point_width;
+
+   STATIC_ASSERT(Elements(sf->payload) >= 3);
+   sf->payload[0] = dw1;
+   sf->payload[1] = dw2;
+   sf->payload[2] = dw3;
+
+   if (state->multisample) {
+      sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN;
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+       *
+       *     "Software must not program a value of 0.0 when running in
+       *      MSRASTMODE_ON_xxx modes - zero-width lines are not available
+       *      when multisampling rasterization is enabled."
+       */
+      if (!line_width) {
+         line_width = 128; /* 1.0f */
+
+         sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
+      }
+   } else {
+      sf->dw_msaa = 0;
+   }
+
+   rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
+   /* 3DSTATE_RASTER is Gen8+ only */
+   sf->dw_raster = 0;
+}
+
+static uint32_t
+rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev,
+                              const struct pipe_rasterizer_state *state)
+{
+   uint32_t dw = 0;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (state->front_ccw)
+      dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW;
+
+   switch (state->cull_face) {
+   case PIPE_FACE_NONE:
+      dw |= GEN8_RASTER_DW1_CULLMODE_NONE;
+      break;
+   case PIPE_FACE_FRONT:
+      dw |= GEN8_RASTER_DW1_CULLMODE_FRONT;
+      break;
+   case PIPE_FACE_BACK:
+      dw |= GEN8_RASTER_DW1_CULLMODE_BACK;
+      break;
+   case PIPE_FACE_FRONT_AND_BACK:
+      dw |= GEN8_RASTER_DW1_CULLMODE_BOTH;
+      break;
+   }
+
+   if (state->point_smooth)
+      dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE;
+
+   if (state->multisample)
+      dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
+
+   if (state->offset_tri)
+      dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
+   if (state->offset_line)
+      dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME;
+   if (state->offset_point)
+      dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT;
+
+   switch (state->fill_front) {
+   case PIPE_POLYGON_MODE_FILL:
+      dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID;
+      break;
+   case PIPE_POLYGON_MODE_LINE:
+      dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME;
+      break;
+   case PIPE_POLYGON_MODE_POINT:
+      dw |= GEN8_RASTER_DW1_FRONTFACE_POINT;
+      break;
+   }
+
+   switch (state->fill_back) {
+   case PIPE_POLYGON_MODE_FILL:
+      dw |= GEN8_RASTER_DW1_BACKFACE_SOLID;
+      break;
+   case PIPE_POLYGON_MODE_LINE:
+      dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME;
+      break;
+   case PIPE_POLYGON_MODE_POINT:
+      dw |= GEN8_RASTER_DW1_BACKFACE_POINT;
+      break;
+   }
+
+   if (state->line_smooth)
+      dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
+
+   if (state->scissor)
+      dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE;
+
+   if (state->depth_clip)
+      dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE;
+
+   return dw;
+}
+
+static void
+rasterizer_init_sf_gen8(const struct ilo_dev *dev,
+                        const struct pipe_rasterizer_state *state,
+                        struct ilo_rasterizer_sf *sf)
+{
+   int line_width, point_width;
+   uint32_t dw1, dw2, dw3;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   /* in U3.7 */
+   line_width = (int)
+      ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
+   line_width = CLAMP(line_width, 0, 1023);
+
+   /* use GIQ rules */
+   if (line_width == 128 && !state->line_smooth)
+      line_width = 0;
+
+   /* in U8.3 */
+   point_width = (int) (state->point_size * 8.0f + 0.5f);
+   point_width = CLAMP(point_width, 1, 2047);
+
+   dw1 = GEN7_SF_DW1_STATISTICS |
+         GEN7_SF_DW1_VIEWPORT_ENABLE;
+
+   dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
+   if (state->line_smooth)
+      dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0;
+
+   dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
+         GEN7_SF_DW3_SUBPIXEL_8BITS |
+         point_width;
+
+   if (state->line_last_pixel)
+      dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
+
+   if (state->flatshade_first) {
+      dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+             0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+             1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+   } else {
+      dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+             1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+             2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+   }
+
+   if (!state->point_size_per_vertex)
+      dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
+
+   dw3 |= point_width;
+
+   STATIC_ASSERT(Elements(sf->payload) >= 3);
+   sf->payload[0] = dw1;
+   sf->payload[1] = dw2;
+   sf->payload[2] = dw3;
+
+   rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
+
+   sf->dw_msaa = 0;
+   sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state);
+}
+
+static void
+rasterizer_init_wm_gen6(const struct ilo_dev *dev,
+                        const struct pipe_rasterizer_state *state,
+                        struct ilo_rasterizer_wm *wm)
+{
+   uint32_t dw5, dw6;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   /* only the FF unit states are set, as in GEN7 */
+
+   dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
+
+   /* same value as in 3DSTATE_SF */
+   if (state->line_smooth)
+      dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0;
+
+   if (state->poly_stipple_enable)
+      dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
+   if (state->line_stipple_enable)
+      dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
+
+   /*
+    * assertion that makes sure
+    *
+    *   dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
+    *
+    * is valid
+    */
+   STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 &&
+                 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0);
+   dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL;
+
+   if (state->bottom_edge_rule)
+      dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
+
+   wm->dw_msaa_rast =
+      (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0;
+   wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
+
+   STATIC_ASSERT(Elements(wm->payload) >= 2);
+   wm->payload[0] = dw5;
+   wm->payload[1] = dw6;
+}
+
+static void
+rasterizer_init_wm_gen7(const struct ilo_dev *dev,
+                        const struct pipe_rasterizer_state *state,
+                        struct ilo_rasterizer_wm *wm)
+{
+   uint32_t dw1, dw2;
+
+   ILO_DEV_ASSERT(dev, 7, 7.5);
+
+   /*
+    * assertion that makes sure
+    *
+    *   dw1 |= wm->dw_msaa_rast;
+    *   dw2 |= wm->dw_msaa_disp;
+    *
+    * is valid
+    */
+   STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 &&
+                 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0);
+   dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL |
+         GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
+   dw2 = 0;
+
+   /* same value as in 3DSTATE_SF */
+   if (state->line_smooth)
+      dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
+
+   if (state->poly_stipple_enable)
+      dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
+   if (state->line_stipple_enable)
+      dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
+
+   if (state->bottom_edge_rule)
+      dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
+
+   wm->dw_msaa_rast =
+      (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0;
+   wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
+
+   STATIC_ASSERT(Elements(wm->payload) >= 2);
+   wm->payload[0] = dw1;
+   wm->payload[1] = dw2;
+}
+
+static uint32_t
+rasterizer_get_wm_gen8(const struct ilo_dev *dev,
+                       const struct pipe_rasterizer_state *state)
+{
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   dw = GEN7_WM_DW1_ZW_INTERP_PIXEL |
+        GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
+
+   /* same value as in 3DSTATE_SF */
+   if (state->line_smooth)
+      dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
+
+   if (state->poly_stipple_enable)
+      dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
+   if (state->line_stipple_enable)
+      dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
+
+   if (state->bottom_edge_rule)
+      dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
+
+   return dw;
+}
+
+void
+ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
+                        const struct pipe_rasterizer_state *state,
+                        struct ilo_rasterizer_state *rasterizer)
+{
+   rasterizer_init_clip(dev, state, &rasterizer->clip);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      memset(&rasterizer->wm, 0, sizeof(rasterizer->wm));
+      rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state);
+
+      rasterizer_init_sf_gen8(dev, state, &rasterizer->sf);
+   } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      rasterizer_init_wm_gen7(dev, state, &rasterizer->wm);
+      rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
+   } else {
+      rasterizer_init_wm_gen6(dev, state, &rasterizer->wm);
+      rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
+   }
+}
+
+static void
+fs_init_cso_gen6(const struct ilo_dev *dev,
+                 const struct ilo_shader_state *fs,
+                 struct ilo_shader_cso *cso)
+{
+   int start_grf, input_count, sampler_count, interps, max_threads;
+   uint32_t dw2, dw4, dw5, dw6;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
+   input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
+   interps = ilo_shader_get_kernel_param(fs,
+         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
+
+   /* see brwCreateContext() */
+   max_threads = (dev->gt == 2) ? 80 : 40;
+
+   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+   dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
+         0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
+         0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
+
+   dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+    *
+    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
+    *      PS kernel or color calculator has the ability to kill (discard)
+    *      pixels or samples, other than due to depth or stencil testing.
+    *      This bit is required to be ENABLED in the following situations:
+    *
+    *      The API pixel shader program contains "killpix" or "discard"
+    *      instructions, or other code in the pixel shader kernel that can
+    *      cause the final pixel mask to differ from the pixel mask received
+    *      on dispatch.
+    *
+    *      A sampler with chroma key enabled with kill pixel mode is used by
+    *      the pixel shader.
+    *
+    *      Any render target has Alpha Test Enable or AlphaToCoverage Enable
+    *      enabled.
+    *
+    *      The pixel shader kernel generates and outputs oMask.
+    *
+    *      Note: As ClipDistance clipping is fully supported in hardware and
+    *      therefore not via PS instructions, there should be no need to
+    *      ENABLE this bit due to ClipDistance clipping."
+    */
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
+      dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+    *
+    *     "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
+    *      field must be set to disabled."
+    *
+    * TODO This is not checked yet.
+    */
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
+      dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
+
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+      dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
+
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+      dw5 |= GEN6_WM_DW5_PS_USE_W;
+
+   /*
+    * TODO set this bit only when
+    *
+    *  a) fs writes colors and color is not masked, or
+    *  b) fs writes depth, or
+    *  c) fs or cc kills
+    */
+   if (true)
+      dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
+
+   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+   dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
+
+   dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
+         GEN6_WM_DW6_PS_POSOFFSET_NONE |
+         interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT;
+
+   STATIC_ASSERT(Elements(cso->payload) >= 4);
+   cso->payload[0] = dw2;
+   cso->payload[1] = dw4;
+   cso->payload[2] = dw5;
+   cso->payload[3] = dw6;
+}
+
+static uint32_t
+fs_get_wm_gen7(const struct ilo_dev *dev,
+               const struct ilo_shader_state *fs)
+{
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 7, 7.5);
+
+   dw = ilo_shader_get_kernel_param(fs,
+         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
+      GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
+
+   /*
+    * TODO set this bit only when
+    *
+    *  a) fs writes colors and color is not masked, or
+    *  b) fs writes depth, or
+    *  c) fs or cc kills
+    */
+   dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 278:
+    *
+    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
+    *      the PS kernel or color calculator has the ability to kill
+    *      (discard) pixels or samples, other than due to depth or stencil
+    *      testing. This bit is required to be ENABLED in the following
+    *      situations:
+    *
+    *      - The API pixel shader program contains "killpix" or "discard"
+    *        instructions, or other code in the pixel shader kernel that
+    *        can cause the final pixel mask to differ from the pixel mask
+    *        received on dispatch.
+    *
+    *      - A sampler with chroma key enabled with kill pixel mode is used
+    *        by the pixel shader.
+    *
+    *      - Any render target has Alpha Test Enable or AlphaToCoverage
+    *        Enable enabled.
+    *
+    *      - The pixel shader kernel generates and outputs oMask.
+    *
+    *      Note: As ClipDistance clipping is fully supported in hardware
+    *      and therefore not via PS instructions, there should be no need
+    *      to ENABLE this bit due to ClipDistance clipping."
+    */
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
+      dw |= GEN7_WM_DW1_PS_KILL_PIXEL;
+
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
+      dw |= GEN7_WM_DW1_PSCDEPTH_ON;
+
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+      dw |= GEN7_WM_DW1_PS_USE_DEPTH;
+
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+      dw |= GEN7_WM_DW1_PS_USE_W;
+
+   return dw;
+}
+
+static void
+fs_init_cso_gen7(const struct ilo_dev *dev,
+                 const struct ilo_shader_state *fs,
+                 struct ilo_shader_cso *cso)
+{
+   int start_grf, sampler_count, max_threads;
+   uint32_t dw2, dw4, dw5;
+
+   ILO_DEV_ASSERT(dev, 7, 7.5);
+
+   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
+   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
+
+   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+   dw4 = GEN7_PS_DW4_POSOFFSET_NONE;
+
+   /* see brwCreateContext() */
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(7.5):
+      max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
+      dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
+      dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
+      break;
+   case ILO_GEN(7):
+   default:
+      max_threads = (dev->gt == 2) ? 172 : 48;
+      dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+      break;
+   }
+
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
+      dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
+
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
+      dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
+
+   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+   dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
+
+   dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
+         0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
+         0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
+
+   STATIC_ASSERT(Elements(cso->payload) >= 4);
+   cso->payload[0] = dw2;
+   cso->payload[1] = dw4;
+   cso->payload[2] = dw5;
+   cso->payload[3] = fs_get_wm_gen7(dev, fs);
+}
+
+static uint32_t
+fs_get_psx_gen8(const struct ilo_dev *dev,
+                const struct ilo_shader_state *fs)
+{
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   dw = GEN8_PSX_DW1_DISPATCH_ENABLE;
+
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
+      dw |= GEN8_PSX_DW1_KILL_PIXEL;
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
+      dw |= GEN8_PSX_DW1_PSCDEPTH_ON;
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+      dw |= GEN8_PSX_DW1_USE_DEPTH;
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+      dw |= GEN8_PSX_DW1_USE_W;
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
+      dw |= GEN8_PSX_DW1_ATTR_ENABLE;
+
+   return dw;
+}
+
+static uint32_t
+fs_get_wm_gen8(const struct ilo_dev *dev,
+               const struct ilo_shader_state *fs)
+{
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   return ilo_shader_get_kernel_param(fs,
+         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
+      GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
+}
+
+static void
+fs_init_cso_gen8(const struct ilo_dev *dev,
+                 const struct ilo_shader_state *fs,
+                 struct ilo_shader_cso *cso)
+{
+   int start_grf, sampler_count;
+   uint32_t dw3, dw6, dw7;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
+   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
+
+   dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+   dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+   /* always 64? */
+   dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT |
+         GEN8_PS_DW6_POSOFFSET_NONE;
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
+      dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
+
+   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+   dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
+
+   dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
+         0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
+         0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
+
+   STATIC_ASSERT(Elements(cso->payload) >= 5);
+   cso->payload[0] = dw3;
+   cso->payload[1] = dw6;
+   cso->payload[2] = dw7;
+   cso->payload[3] = fs_get_psx_gen8(dev, fs);
+   cso->payload[4] = fs_get_wm_gen8(dev, fs);
+}
+
+void
+ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
+                    const struct ilo_shader_state *fs,
+                    struct ilo_shader_cso *cso)
+{
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      fs_init_cso_gen8(dev, fs, cso);
+   else if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      fs_init_cso_gen7(dev, fs, cso);
+   else
+      fs_init_cso_gen6(dev, fs, cso);
+}
+
+struct ilo_zs_surface_info {
+   int surface_type;
+   int format;
+
+   struct {
+      struct intel_bo *bo;
+      unsigned stride;
+      unsigned qpitch;
+      enum gen_surface_tiling tiling;
+      uint32_t offset;
+   } zs, stencil, hiz;
+
+   unsigned width, height, depth;
+   unsigned lod, first_layer, num_layers;
+};
+
+static void
+zs_init_info_null(const struct ilo_dev *dev,
+                  struct ilo_zs_surface_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   memset(info, 0, sizeof(*info));
+
+   info->surface_type = GEN6_SURFTYPE_NULL;
+   info->format = GEN6_ZFORMAT_D32_FLOAT;
+   info->width = 1;
+   info->height = 1;
+   info->depth = 1;
+   info->num_layers = 1;
+}
+
+static void
+zs_init_info(const struct ilo_dev *dev,
+             const struct ilo_texture *tex,
+             enum pipe_format format, unsigned level,
+             unsigned first_layer, unsigned num_layers,
+             struct ilo_zs_surface_info *info)
+{
+   bool separate_stencil;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   memset(info, 0, sizeof(*info));
+
+   info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
+
+   if (info->surface_type == GEN6_SURFTYPE_CUBE) {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
+       *
+       *     "For Other Surfaces (Cube Surfaces):
+       *      This field (Minimum Array Element) is ignored."
+       *
+       *     "For Other Surfaces (Cube Surfaces):
+       *      This field (Render Target View Extent) is ignored."
+       *
+       * As such, we cannot set first_layer and num_layers on cube surfaces.
+       * To work around that, treat it as a 2D surface.
+       */
+      info->surface_type = GEN6_SURFTYPE_2D;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      separate_stencil = true;
+   }
+   else {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+       *
+       *     "This field (Separate Stencil Buffer Enable) must be set to the
+       *      same value (enabled or disabled) as Hierarchical Depth Buffer
+       *      Enable."
+       */
+      separate_stencil =
+         ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers);
+   }
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+    *
+    *     "If this field (Hierarchical Depth Buffer Enable) is enabled, the
+    *      Surface Format of the depth buffer cannot be
+    *      D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
+    *      requires the separate stencil buffer."
+    *
+    * From the Ironlake PRM, volume 2 part 1, page 330:
+    *
+    *     "If this field (Separate Stencil Buffer Enable) is disabled, the
+    *      Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
+    *
+    * There is no similar restriction for GEN6.  But when D24_UNORM_X8_UINT
+    * is indeed used, the depth values output by the fragment shaders will
+    * be different when read back.
+    *
+    * As for GEN7+, separate_stencil is always true.
+    */
+   switch (format) {
+   case PIPE_FORMAT_Z16_UNORM:
+      info->format = GEN6_ZFORMAT_D16_UNORM;
+      break;
+   case PIPE_FORMAT_Z32_FLOAT:
+      info->format = GEN6_ZFORMAT_D32_FLOAT;
+      break;
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+      info->format = (separate_stencil) ?
+         GEN6_ZFORMAT_D24_UNORM_X8_UINT :
+         GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+      break;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      info->format = (separate_stencil) ?
+         GEN6_ZFORMAT_D32_FLOAT :
+         GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
+      break;
+   case PIPE_FORMAT_S8_UINT:
+      if (separate_stencil) {
+         info->format = GEN6_ZFORMAT_D32_FLOAT;
+         break;
+      }
+      /* fall through */
+   default:
+      assert(!"unsupported depth/stencil format");
+      zs_init_info_null(dev, info);
+      return;
+      break;
+   }
+
+   if (format != PIPE_FORMAT_S8_UINT) {
+      info->zs.bo = tex->image.bo;
+      info->zs.stride = tex->image.bo_stride;
+
+      assert(tex->image.layer_height % 4 == 0);
+      info->zs.qpitch = tex->image.layer_height / 4;
+
+      info->zs.tiling = tex->image.tiling;
+      info->zs.offset = 0;
+   }
+
+   if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
+      const struct ilo_texture *s8_tex =
+         (tex->separate_s8) ? tex->separate_s8 : tex;
+
+      info->stencil.bo = s8_tex->image.bo;
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 329:
+       *
+       *     "The pitch must be set to 2x the value computed based on width,
+       *       as the stencil buffer is stored with two rows interleaved."
+       *
+       * For GEN7, we still dobule the stride because we did not double the
+       * slice widths when initializing the layout.
+       */
+      info->stencil.stride = s8_tex->image.bo_stride * 2;
+
+      assert(s8_tex->image.layer_height % 4 == 0);
+      info->stencil.qpitch = s8_tex->image.layer_height / 4;
+
+      info->stencil.tiling = s8_tex->image.tiling;
+
+      if (ilo_dev_gen(dev) == ILO_GEN(6)) {
+         unsigned x, y;
+
+         assert(s8_tex->image.walk == ILO_IMAGE_WALK_LOD);
+
+         /* offset to the level */
+         ilo_image_get_slice_pos(&s8_tex->image, level, 0, &x, &y);
+         ilo_image_pos_to_mem(&s8_tex->image, x, y, &x, &y);
+         info->stencil.offset = ilo_image_mem_to_raw(&s8_tex->image, x, y);
+      }
+   }
+
+   if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) {
+      info->hiz.bo = tex->image.aux_bo;
+      info->hiz.stride = tex->image.aux_stride;
+
+      assert(tex->image.aux_layer_height % 4 == 0);
+      info->hiz.qpitch = tex->image.aux_layer_height / 4;
+
+      info->hiz.tiling = GEN6_TILING_Y;
+
+      /* offset to the level */
+      if (ilo_dev_gen(dev) == ILO_GEN(6))
+         info->hiz.offset = tex->image.aux_offsets[level];
+   }
+
+   info->width = tex->image.width0;
+   info->height = tex->image.height0;
+   info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
+      tex->base.depth0 : num_layers;
+
+   info->lod = level;
+   info->first_layer = first_layer;
+   info->num_layers = num_layers;
+}
+
+void
+ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
+                        const struct ilo_texture *tex,
+                        enum pipe_format format, unsigned level,
+                        unsigned first_layer, unsigned num_layers,
+                        struct ilo_zs_surface *zs)
+{
+   const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
+   const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
+   struct ilo_zs_surface_info info;
+   uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
+   int align_w = 8, align_h = 4;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (tex) {
+      zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
+
+      switch (tex->base.nr_samples) {
+      case 2:
+         align_w /= 2;
+         break;
+      case 4:
+         align_w /= 2;
+         align_h /= 2;
+         break;
+      case 8:
+         align_w /= 4;
+         align_h /= 2;
+         break;
+      case 16:
+         align_w /= 4;
+         align_h /= 4;
+         break;
+      default:
+         break;
+      }
+   } else {
+      zs_init_info_null(dev, &info);
+   }
+
+   switch (info.surface_type) {
+   case GEN6_SURFTYPE_NULL:
+      break;
+   case GEN6_SURFTYPE_1D:
+      assert(info.width <= max_2d_size && info.height == 1 &&
+             info.depth <= max_array_size);
+      assert(info.first_layer < max_array_size - 1 &&
+             info.num_layers <= max_array_size);
+      break;
+   case GEN6_SURFTYPE_2D:
+      assert(info.width <= max_2d_size && info.height <= max_2d_size &&
+             info.depth <= max_array_size);
+      assert(info.first_layer < max_array_size - 1 &&
+             info.num_layers <= max_array_size);
+      break;
+   case GEN6_SURFTYPE_3D:
+      assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
+      assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
+      break;
+   case GEN6_SURFTYPE_CUBE:
+      assert(info.width <= max_2d_size && info.height <= max_2d_size &&
+             info.depth == 1);
+      assert(info.first_layer == 0 && info.num_layers == 1);
+      assert(info.width == info.height);
+      break;
+   default:
+      assert(!"unexpected depth surface type");
+      break;
+   }
+
+   dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT |
+         info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
+
+   if (info.zs.bo) {
+      /* required for GEN6+ */
+      assert(info.zs.tiling == GEN6_TILING_Y);
+      assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
+            info.zs.stride % 128 == 0);
+      assert(info.width <= info.zs.stride);
+
+      dw1 |= (info.zs.stride - 1);
+      dw2 = info.zs.offset;
+   } else {
+      dw2 = 0;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      if (info.zs.bo)
+         dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
+
+      if (info.stencil.bo)
+         dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE;
+
+      if (info.hiz.bo)
+         dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
+
+      dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
+            (info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
+            info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
+
+      zs->dw_aligned_8x4 =
+         (align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
+         (align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
+         info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
+
+      dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT |
+            info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT;
+
+      dw5 = 0;
+
+      dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT;
+
+      if (ilo_dev_gen(dev) >= ILO_GEN(8))
+         dw6 |= info.zs.qpitch;
+   } else {
+      /* always Y-tiled */
+      dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT;
+
+      if (info.hiz.bo) {
+         dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
+                GEN6_DEPTH_DW1_SEPARATE_STENCIL;
+      }
+
+      dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
+            (info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
+            info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
+            GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
+
+      zs->dw_aligned_8x4 =
+         (align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
+         (align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
+         info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
+         GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
+
+      dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT |
+            info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT |
+            (info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT;
+
+      dw5 = 0;
+
+      dw6 = 0;
+   }
+
+   STATIC_ASSERT(Elements(zs->payload) >= 12);
+
+   zs->payload[0] = dw1;
+   zs->payload[1] = dw2;
+   zs->payload[2] = dw3;
+   zs->payload[3] = dw4;
+   zs->payload[4] = dw5;
+   zs->payload[5] = dw6;
+
+   /* do not increment reference count */
+   zs->bo = info.zs.bo;
+
+   /* separate stencil */
+   if (info.stencil.bo) {
+      assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
+             info.stencil.stride % 128 == 0);
+
+      dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT;
+      if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+         dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
+
+      dw2 = info.stencil.offset;
+      dw4 = info.stencil.qpitch;
+   } else {
+      dw1 = 0;
+      dw2 = 0;
+      dw4 = 0;
+   }
+
+   zs->payload[6] = dw1;
+   zs->payload[7] = dw2;
+   zs->payload[8] = dw4;
+   /* do not increment reference count */
+   zs->separate_s8_bo = info.stencil.bo;
+
+   /* hiz */
+   if (info.hiz.bo) {
+      dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT;
+      dw2 = info.hiz.offset;
+      dw4 = info.hiz.qpitch;
+   } else {
+      dw1 = 0;
+      dw2 = 0;
+      dw4 = 0;
+   }
+
+   zs->payload[9] = dw1;
+   zs->payload[10] = dw2;
+   zs->payload[11] = dw4;
+   /* do not increment reference count */
+   zs->hiz_bo = info.hiz.bo;
+}
+
+static void
+viewport_get_guardband(const struct ilo_dev *dev,
+                       int center_x, int center_y,
+                       int *min_gbx, int *max_gbx,
+                       int *min_gby, int *max_gby)
+{
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 234:
+    *
+    *     "Per-Device Guardband Extents
+    *
+    *       - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
+    *       - Maximum Post-Clamp Delta (X or Y): 16K"
+    *
+    *     "In addition, in order to be correctly rendered, objects must have a
+    *      screenspace bounding box not exceeding 8K in the X or Y direction.
+    *      This additional restriction must also be comprehended by software,
+    *      i.e., enforced by use of clipping."
+    *
+    * From the Ivy Bridge PRM, volume 2 part 1, page 248:
+    *
+    *     "Per-Device Guardband Extents
+    *
+    *       - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
+    *       - Maximum Post-Clamp Delta (X or Y): N/A"
+    *
+    *     "In addition, in order to be correctly rendered, objects must have a
+    *      screenspace bounding box not exceeding 8K in the X or Y direction.
+    *      This additional restriction must also be comprehended by software,
+    *      i.e., enforced by use of clipping."
+    *
+    * Combined, the bounding box of any object can not exceed 8K in both
+    * width and height.
+    *
+    * Below we set the guardband as a squre of length 8K, centered at where
+    * the viewport is.  This makes sure all objects passing the GB test are
+    * valid to the renderer, and those failing the XY clipping have a
+    * better chance of passing the GB test.
+    */
+   const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384;
+   const int half_len = 8192 / 2;
+
+   /* make sure the guardband is within the valid range */
+   if (center_x - half_len < -max_extent)
+      center_x = -max_extent + half_len;
+   else if (center_x + half_len > max_extent - 1)
+      center_x = max_extent - half_len;
+
+   if (center_y - half_len < -max_extent)
+      center_y = -max_extent + half_len;
+   else if (center_y + half_len > max_extent - 1)
+      center_y = max_extent - half_len;
+
+   *min_gbx = (float) (center_x - half_len);
+   *max_gbx = (float) (center_x + half_len);
+   *min_gby = (float) (center_y - half_len);
+   *max_gby = (float) (center_y + half_len);
+}
+
+void
+ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
+                         const struct pipe_viewport_state *state,
+                         struct ilo_viewport_cso *vp)
+{
+   const float scale_x = fabs(state->scale[0]);
+   const float scale_y = fabs(state->scale[1]);
+   const float scale_z = fabs(state->scale[2]);
+   int min_gbx, max_gbx, min_gby, max_gby;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   viewport_get_guardband(dev,
+         (int) state->translate[0],
+         (int) state->translate[1],
+         &min_gbx, &max_gbx, &min_gby, &max_gby);
+
+   /* matrix form */
+   vp->m00 = state->scale[0];
+   vp->m11 = state->scale[1];
+   vp->m22 = state->scale[2];
+   vp->m30 = state->translate[0];
+   vp->m31 = state->translate[1];
+   vp->m32 = state->translate[2];
+
+   /* guardband in NDC space */
+   vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
+   vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
+   vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
+   vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
+
+   /* viewport in screen space */
+   vp->min_x = scale_x * -1.0f + state->translate[0];
+   vp->max_x = scale_x *  1.0f + state->translate[0];
+   vp->min_y = scale_y * -1.0f + state->translate[1];
+   vp->max_y = scale_y *  1.0f + state->translate[1];
+   vp->min_z = scale_z * -1.0f + state->translate[2];
+   vp->max_z = scale_z *  1.0f + state->translate[2];
+}
+
+/**
+ * Translate a pipe logicop to the matching hardware logicop.
+ */
+static int
+gen6_translate_pipe_logicop(unsigned logicop)
+{
+   switch (logicop) {
+   case PIPE_LOGICOP_CLEAR:         return GEN6_LOGICOP_CLEAR;
+   case PIPE_LOGICOP_NOR:           return GEN6_LOGICOP_NOR;
+   case PIPE_LOGICOP_AND_INVERTED:  return GEN6_LOGICOP_AND_INVERTED;
+   case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED;
+   case PIPE_LOGICOP_AND_REVERSE:   return GEN6_LOGICOP_AND_REVERSE;
+   case PIPE_LOGICOP_INVERT:        return GEN6_LOGICOP_INVERT;
+   case PIPE_LOGICOP_XOR:           return GEN6_LOGICOP_XOR;
+   case PIPE_LOGICOP_NAND:          return GEN6_LOGICOP_NAND;
+   case PIPE_LOGICOP_AND:           return GEN6_LOGICOP_AND;
+   case PIPE_LOGICOP_EQUIV:         return GEN6_LOGICOP_EQUIV;
+   case PIPE_LOGICOP_NOOP:          return GEN6_LOGICOP_NOOP;
+   case PIPE_LOGICOP_OR_INVERTED:   return GEN6_LOGICOP_OR_INVERTED;
+   case PIPE_LOGICOP_COPY:          return GEN6_LOGICOP_COPY;
+   case PIPE_LOGICOP_OR_REVERSE:    return GEN6_LOGICOP_OR_REVERSE;
+   case PIPE_LOGICOP_OR:            return GEN6_LOGICOP_OR;
+   case PIPE_LOGICOP_SET:           return GEN6_LOGICOP_SET;
+   default:
+      assert(!"unknown logicop function");
+      return GEN6_LOGICOP_CLEAR;
+   }
+}
+
+/**
+ * Translate a pipe blend function to the matching hardware blend function.
+ */
+static int
+gen6_translate_pipe_blend(unsigned blend)
+{
+   switch (blend) {
+   case PIPE_BLEND_ADD:                return GEN6_BLENDFUNCTION_ADD;
+   case PIPE_BLEND_SUBTRACT:           return GEN6_BLENDFUNCTION_SUBTRACT;
+   case PIPE_BLEND_REVERSE_SUBTRACT:   return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
+   case PIPE_BLEND_MIN:                return GEN6_BLENDFUNCTION_MIN;
+   case PIPE_BLEND_MAX:                return GEN6_BLENDFUNCTION_MAX;
+   default:
+      assert(!"unknown blend function");
+      return GEN6_BLENDFUNCTION_ADD;
+   };
+}
+
+/**
+ * Translate a pipe blend factor to the matching hardware blend factor.
+ */
+static int
+gen6_translate_pipe_blendfactor(unsigned blendfactor)
+{
+   switch (blendfactor) {
+   case PIPE_BLENDFACTOR_ONE:                return GEN6_BLENDFACTOR_ONE;
+   case PIPE_BLENDFACTOR_SRC_COLOR:          return GEN6_BLENDFACTOR_SRC_COLOR;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:          return GEN6_BLENDFACTOR_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_DST_ALPHA:          return GEN6_BLENDFACTOR_DST_ALPHA;
+   case PIPE_BLENDFACTOR_DST_COLOR:          return GEN6_BLENDFACTOR_DST_COLOR;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
+   case PIPE_BLENDFACTOR_CONST_COLOR:        return GEN6_BLENDFACTOR_CONST_COLOR;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:        return GEN6_BLENDFACTOR_CONST_ALPHA;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:         return GEN6_BLENDFACTOR_SRC1_COLOR;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:         return GEN6_BLENDFACTOR_SRC1_ALPHA;
+   case PIPE_BLENDFACTOR_ZERO:               return GEN6_BLENDFACTOR_ZERO;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:      return GEN6_BLENDFACTOR_INV_SRC_COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:      return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:      return GEN6_BLENDFACTOR_INV_DST_ALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:      return GEN6_BLENDFACTOR_INV_DST_COLOR;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:    return GEN6_BLENDFACTOR_INV_CONST_COLOR;
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:    return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:     return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:     return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
+   default:
+      assert(!"unknown blend factor");
+      return GEN6_BLENDFACTOR_ONE;
+   };
+}
+
+/**
+ * Translate a pipe stencil op to the matching hardware stencil op.
+ */
+static int
+gen6_translate_pipe_stencil_op(unsigned stencil_op)
+{
+   switch (stencil_op) {
+   case PIPE_STENCIL_OP_KEEP:       return GEN6_STENCILOP_KEEP;
+   case PIPE_STENCIL_OP_ZERO:       return GEN6_STENCILOP_ZERO;
+   case PIPE_STENCIL_OP_REPLACE:    return GEN6_STENCILOP_REPLACE;
+   case PIPE_STENCIL_OP_INCR:       return GEN6_STENCILOP_INCRSAT;
+   case PIPE_STENCIL_OP_DECR:       return GEN6_STENCILOP_DECRSAT;
+   case PIPE_STENCIL_OP_INCR_WRAP:  return GEN6_STENCILOP_INCR;
+   case PIPE_STENCIL_OP_DECR_WRAP:  return GEN6_STENCILOP_DECR;
+   case PIPE_STENCIL_OP_INVERT:     return GEN6_STENCILOP_INVERT;
+   default:
+      assert(!"unknown stencil op");
+      return GEN6_STENCILOP_KEEP;
+   }
+}
+
+static int
+gen6_blend_factor_dst_alpha_forced_one(int factor)
+{
+   switch (factor) {
+   case GEN6_BLENDFACTOR_DST_ALPHA:
+      return GEN6_BLENDFACTOR_ONE;
+   case GEN6_BLENDFACTOR_INV_DST_ALPHA:
+   case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      return GEN6_BLENDFACTOR_ZERO;
+   default:
+      return factor;
+   }
+}
+
+static uint32_t
+blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev,
+                               const struct pipe_rt_blend_state *rt,
+                               bool dst_alpha_forced_one)
+{
+   int rgb_src, rgb_dst, a_src, a_dst;
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   if (!rt->blend_enable)
+      return 0;
+
+   rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
+   rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
+   a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
+   a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
+
+   if (dst_alpha_forced_one) {
+      rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
+      rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
+      a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
+      a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
+   }
+
+   dw = GEN6_RT_DW0_BLEND_ENABLE |
+        gen6_translate_pipe_blend(rt->alpha_func) << 26 |
+        a_src << 20 |
+        a_dst << 15 |
+        gen6_translate_pipe_blend(rt->rgb_func) << 11 |
+        rgb_src << 5 |
+        rgb_dst;
+
+   if (rt->rgb_func != rt->alpha_func ||
+       rgb_src != a_src || rgb_dst != a_dst)
+      dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE;
+
+   return dw;
+}
+
+static uint32_t
+blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev,
+                               const struct pipe_rt_blend_state *rt,
+                               bool dst_alpha_forced_one,
+                               bool *independent_alpha)
+{
+   int rgb_src, rgb_dst, a_src, a_dst;
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (!rt->blend_enable) {
+      *independent_alpha = false;
+      return 0;
+   }
+
+   rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
+   rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
+   a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
+   a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
+
+   if (dst_alpha_forced_one) {
+      rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
+      rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
+      a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
+      a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
+   }
+
+   dw = GEN8_RT_DW0_BLEND_ENABLE |
+        rgb_src << 26 |
+        rgb_dst << 21 |
+        gen6_translate_pipe_blend(rt->rgb_func) << 18 |
+        a_src << 13 |
+        a_dst << 8 |
+        gen6_translate_pipe_blend(rt->alpha_func) << 5;
+
+   *independent_alpha = (rt->rgb_func != rt->alpha_func ||
+                         rgb_src != a_src ||
+                         rgb_dst != a_dst);
+
+   return dw;
+}
+
+static void
+blend_init_cso_gen6(const struct ilo_dev *dev,
+                    const struct pipe_blend_state *state,
+                    struct ilo_blend_state *blend,
+                    unsigned index)
+{
+   const struct pipe_rt_blend_state *rt = &state->rt[index];
+   struct ilo_blend_cso *cso = &blend->cso[index];
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   cso->payload[0] = 0;
+   cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
+                     GEN6_RT_DW1_PRE_BLEND_CLAMP |
+                     GEN6_RT_DW1_POST_BLEND_CLAMP;
+
+   if (!(rt->colormask & PIPE_MASK_A))
+      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A;
+   if (!(rt->colormask & PIPE_MASK_R))
+      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R;
+   if (!(rt->colormask & PIPE_MASK_G))
+      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G;
+   if (!(rt->colormask & PIPE_MASK_B))
+      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+    *
+    *     "Color Buffer Blending and Logic Ops must not be enabled
+    *      simultaneously, or behavior is UNDEFINED."
+    *
+    * Since state->logicop_enable takes precedence over rt->blend_enable,
+    * no special care is needed.
+    */
+   if (state->logicop_enable) {
+      cso->dw_blend = 0;
+      cso->dw_blend_dst_alpha_forced_one = 0;
+   } else {
+      cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false);
+      cso->dw_blend_dst_alpha_forced_one =
+         blend_get_rt_blend_enable_gen6(dev, rt, true);
+   }
+}
+
+static bool
+blend_init_cso_gen8(const struct ilo_dev *dev,
+                    const struct pipe_blend_state *state,
+                    struct ilo_blend_state *blend,
+                    unsigned index)
+{
+   const struct pipe_rt_blend_state *rt = &state->rt[index];
+   struct ilo_blend_cso *cso = &blend->cso[index];
+   bool independent_alpha = false;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   cso->payload[0] = 0;
+   cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT |
+                     GEN8_RT_DW1_PRE_BLEND_CLAMP |
+                     GEN8_RT_DW1_POST_BLEND_CLAMP;
+
+   if (!(rt->colormask & PIPE_MASK_A))
+      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A;
+   if (!(rt->colormask & PIPE_MASK_R))
+      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R;
+   if (!(rt->colormask & PIPE_MASK_G))
+      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G;
+   if (!(rt->colormask & PIPE_MASK_B))
+      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B;
+
+   if (state->logicop_enable) {
+      cso->dw_blend = 0;
+      cso->dw_blend_dst_alpha_forced_one = 0;
+   } else {
+      bool tmp[2];
+
+      cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]);
+      cso->dw_blend_dst_alpha_forced_one =
+         blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]);
+
+      if (tmp[0] || tmp[1])
+         independent_alpha = true;
+   }
+
+   return independent_alpha;
+}
+
+static uint32_t
+blend_get_logicop_enable_gen6(const struct ilo_dev *dev,
+                              const struct pipe_blend_state *state)
+{
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   if (!state->logicop_enable)
+      return 0;
+
+   return GEN6_RT_DW1_LOGICOP_ENABLE |
+          gen6_translate_pipe_logicop(state->logicop_func) << 18;
+}
+
+static uint32_t
+blend_get_logicop_enable_gen8(const struct ilo_dev *dev,
+                              const struct pipe_blend_state *state)
+{
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (!state->logicop_enable)
+      return 0;
+
+   return GEN8_RT_DW1_LOGICOP_ENABLE |
+          gen6_translate_pipe_logicop(state->logicop_func) << 27;
+}
+
+static uint32_t
+blend_get_alpha_mod_gen6(const struct ilo_dev *dev,
+                         const struct pipe_blend_state *state,
+                         bool dual_blend)
+{
+   uint32_t dw = 0;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   if (state->alpha_to_coverage) {
+      dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE;
+      if (ilo_dev_gen(dev) >= ILO_GEN(7))
+         dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER;
+   }
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 378:
+    *
+    *     "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
+    *      must be disabled."
+    */
+   if (state->alpha_to_one && !dual_blend)
+      dw |= GEN6_RT_DW1_ALPHA_TO_ONE;
+
+   return dw;
+}
+
+static uint32_t
+blend_get_alpha_mod_gen8(const struct ilo_dev *dev,
+                         const struct pipe_blend_state *state,
+                         bool dual_blend)
+{
+   uint32_t dw = 0;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (state->alpha_to_coverage) {
+      dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE |
+            GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER;
+   }
+
+   if (state->alpha_to_one && !dual_blend)
+      dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE;
+
+   return dw;
+}
+
+static uint32_t
+blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0)
+{
+   int rgb_src, rgb_dst, a_src, a_dst;
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE))
+      return 0;
+
+   a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR);
+   a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR);
+   rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR);
+   rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR);
+
+   dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE;
+   dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR);
+   dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR);
+   dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR);
+   dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR);
+
+   if (a_src != rgb_src || a_dst != rgb_dst)
+      dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
+
+   return dw;
+}
+
+void
+ilo_gpe_init_blend(const struct ilo_dev *dev,
+                   const struct pipe_blend_state *state,
+                   struct ilo_blend_state *blend)
+{
+   unsigned i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   blend->dual_blend = (util_blend_state_is_dual(state, 0) &&
+                        state->rt[0].blend_enable &&
+                        !state->logicop_enable);
+   blend->alpha_to_coverage = state->alpha_to_coverage;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      bool independent_alpha;
+
+      blend->dw_alpha_mod =
+         blend_get_alpha_mod_gen8(dev, state, blend->dual_blend);
+      blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state);
+      blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0;
+
+      independent_alpha = blend_init_cso_gen8(dev, state, blend, 0);
+      if (independent_alpha)
+         blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
+
+      blend->dw_ps_blend = blend_get_ps_blend_gen8(dev,
+            blend->cso[0].dw_blend);
+      blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev,
+            blend->cso[0].dw_blend_dst_alpha_forced_one);
+
+      if (state->independent_blend_enable) {
+         for (i = 1; i < Elements(blend->cso); i++) {
+            independent_alpha = blend_init_cso_gen8(dev, state, blend, i);
+            if (independent_alpha)
+               blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
+         }
+      } else {
+         for (i = 1; i < Elements(blend->cso); i++)
+            blend->cso[i] = blend->cso[0];
+      }
+   } else {
+      blend->dw_alpha_mod =
+         blend_get_alpha_mod_gen6(dev, state, blend->dual_blend);
+      blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state);
+      blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0;
+
+      blend->dw_ps_blend = 0;
+      blend->dw_ps_blend_dst_alpha_forced_one = 0;
+
+      blend_init_cso_gen6(dev, state, blend, 0);
+      if (state->independent_blend_enable) {
+         for (i = 1; i < Elements(blend->cso); i++)
+            blend_init_cso_gen6(dev, state, blend, i);
+      } else {
+         for (i = 1; i < Elements(blend->cso); i++)
+            blend->cso[i] = blend->cso[0];
+      }
+   }
+}
+
+/**
+ * Translate a pipe DSA test function to the matching hardware compare
+ * function.
+ */
+static int
+gen6_translate_dsa_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:      return GEN6_COMPAREFUNCTION_NEVER;
+   case PIPE_FUNC_LESS:       return GEN6_COMPAREFUNCTION_LESS;
+   case PIPE_FUNC_EQUAL:      return GEN6_COMPAREFUNCTION_EQUAL;
+   case PIPE_FUNC_LEQUAL:     return GEN6_COMPAREFUNCTION_LEQUAL;
+   case PIPE_FUNC_GREATER:    return GEN6_COMPAREFUNCTION_GREATER;
+   case PIPE_FUNC_NOTEQUAL:   return GEN6_COMPAREFUNCTION_NOTEQUAL;
+   case PIPE_FUNC_GEQUAL:     return GEN6_COMPAREFUNCTION_GEQUAL;
+   case PIPE_FUNC_ALWAYS:     return GEN6_COMPAREFUNCTION_ALWAYS;
+   default:
+      assert(!"unknown depth/stencil/alpha test function");
+      return GEN6_COMPAREFUNCTION_NEVER;
+   }
+}
+
+static uint32_t
+dsa_get_stencil_enable_gen6(const struct ilo_dev *dev,
+                            const struct pipe_stencil_state *stencil0,
+                            const struct pipe_stencil_state *stencil1)
+{
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   if (!stencil0->enabled)
+      return 0;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 359:
+    *
+    *     "If the Depth Buffer is either undefined or does not have a surface
+    *      format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
+    *      stencil buffer is disabled, Stencil Test Enable must be DISABLED"
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 370:
+    *
+    *     "This field (Stencil Test Enable) cannot be enabled if
+    *      Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
+    *
+    * TODO We do not check these yet.
+    */
+   dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE |
+        gen6_translate_dsa_func(stencil0->func) << 28 |
+        gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
+        gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
+        gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
+   if (stencil0->writemask)
+      dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+
+   if (stencil1->enabled) {
+      dw |= GEN6_ZS_DW0_STENCIL1_ENABLE |
+            gen6_translate_dsa_func(stencil1->func) << 12 |
+            gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
+            gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
+            gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
+      if (stencil1->writemask)
+         dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+   }
+
+   return dw;
+}
+
+static uint32_t
+dsa_get_stencil_enable_gen8(const struct ilo_dev *dev,
+                            const struct pipe_stencil_state *stencil0,
+                            const struct pipe_stencil_state *stencil1)
+{
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (!stencil0->enabled)
+      return 0;
+
+   dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 |
+        gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 |
+        gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 |
+        gen6_translate_dsa_func(stencil0->func) << 8 |
+        GEN8_ZS_DW1_STENCIL_TEST_ENABLE;
+   if (stencil0->writemask)
+      dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+
+   if (stencil1->enabled) {
+      dw |= gen6_translate_dsa_func(stencil1->func) << 20 |
+            gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 |
+            gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 |
+            gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 |
+            GEN8_ZS_DW1_STENCIL1_ENABLE;
+      if (stencil1->writemask)
+         dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+   }
+
+   return dw;
+}
+
+static uint32_t
+dsa_get_depth_enable_gen6(const struct ilo_dev *dev,
+                          const struct pipe_depth_state *state)
+{
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 360:
+    *
+    *     "Enabling the Depth Test function without defining a Depth Buffer is
+    *      UNDEFINED."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 375:
+    *
+    *     "A Depth Buffer must be defined before enabling writes to it, or
+    *      operation is UNDEFINED."
+    *
+    * TODO We do not check these yet.
+    */
+   if (state->enabled) {
+      dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
+           gen6_translate_dsa_func(state->func) << 27;
+   } else {
+      dw = GEN6_COMPAREFUNCTION_ALWAYS << 27;
+   }
+
+   if (state->writemask)
+      dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
+
+   return dw;
+}
+
+static uint32_t
+dsa_get_depth_enable_gen8(const struct ilo_dev *dev,
+                          const struct pipe_depth_state *state)
+{
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (state->enabled) {
+      dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE |
+           gen6_translate_dsa_func(state->func) << 5;
+   } else {
+      dw = GEN6_COMPAREFUNCTION_ALWAYS << 5;
+   }
+
+   if (state->writemask)
+      dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE;
+
+   return dw;
+}
+
+static uint32_t
+dsa_get_alpha_enable_gen6(const struct ilo_dev *dev,
+                          const struct pipe_alpha_state *state)
+{
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   if (!state->enabled)
+      return 0;
+
+   /* this will be ORed to BLEND_STATE */
+   dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE |
+        gen6_translate_dsa_func(state->func) << 13;
+
+   return dw;
+}
+
+static uint32_t
+dsa_get_alpha_enable_gen8(const struct ilo_dev *dev,
+                          const struct pipe_alpha_state *state)
+{
+   uint32_t dw;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (!state->enabled)
+      return 0;
+
+   /* this will be ORed to BLEND_STATE */
+   dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE |
+        gen6_translate_dsa_func(state->func) << 24;
+
+   return dw;
+}
+
+void
+ilo_gpe_init_dsa(const struct ilo_dev *dev,
+                 const struct pipe_depth_stencil_alpha_state *state,
+                 struct ilo_dsa_state *dsa)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   STATIC_ASSERT(Elements(dsa->payload) >= 3);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev,
+            &state->stencil[0], &state->stencil[1]);
+      const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth);
+
+      assert(!(dw_stencil & dw_depth));
+      dsa->payload[0] = dw_stencil | dw_depth;
+
+      dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha);
+      dsa->dw_ps_blend_alpha = (state->alpha.enabled) ?
+         GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0;
+   } else {
+      dsa->payload[0] = dsa_get_stencil_enable_gen6(dev,
+            &state->stencil[0], &state->stencil[1]);
+      dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth);
+
+      dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha);
+      dsa->dw_ps_blend_alpha = 0;
+   }
+
+   dsa->payload[1] = state->stencil[0].valuemask << 24 |
+                     state->stencil[0].writemask << 16 |
+                     state->stencil[1].valuemask << 8 |
+                     state->stencil[1].writemask;
+
+   dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value);
+}
+
+void
+ilo_gpe_set_scissor(const struct ilo_dev *dev,
+                    unsigned start_slot,
+                    unsigned num_states,
+                    const struct pipe_scissor_state *states,
+                    struct ilo_scissor_state *scissor)
+{
+   unsigned i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   for (i = 0; i < num_states; i++) {
+      uint16_t min_x, min_y, max_x, max_y;
+
+      /* both max and min are inclusive in SCISSOR_RECT */
+      if (states[i].minx < states[i].maxx &&
+          states[i].miny < states[i].maxy) {
+         min_x = states[i].minx;
+         min_y = states[i].miny;
+         max_x = states[i].maxx - 1;
+         max_y = states[i].maxy - 1;
+      }
+      else {
+         /* we have to make min greater than max */
+         min_x = 1;
+         min_y = 1;
+         max_x = 0;
+         max_y = 0;
+      }
+
+      scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
+      scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
+   }
+
+   if (!start_slot && num_states)
+      scissor->scissor0 = states[0];
+}
+
+void
+ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
+                         struct ilo_scissor_state *scissor)
+{
+   unsigned i;
+
+   for (i = 0; i < Elements(scissor->payload); i += 2) {
+      scissor->payload[i + 0] = 1 << 16 | 1;
+      scissor->payload[i + 1] = 0;
+   }
+}
+
+static void
+fb_set_blend_caps(const struct ilo_dev *dev,
+                  enum pipe_format format,
+                  struct ilo_fb_blend_caps *caps)
+{
+   const struct util_format_description *desc =
+      util_format_description(format);
+   const int ch = util_format_get_first_non_void_channel(format);
+
+   memset(caps, 0, sizeof(*caps));
+
+   if (format == PIPE_FORMAT_NONE || desc->is_mixed)
+      return;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+    *
+    *     "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
+    *      variants), otherwise Logic Ops must be DISABLED."
+    *
+    * According to the classic driver, this is lifted on Gen8+.
+    */
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      caps->can_logicop = true;
+   } else {
+      caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized &&
+            desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
+            desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
+   }
+
+   /* no blending for pure integer formats */
+   caps->can_blend = !util_format_is_pure_integer(format);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+    *
+    *     "Alpha Test can only be enabled if Pixel Shader outputs a float
+    *      alpha value."
+    */
+   caps->can_alpha_test = !util_format_is_pure_integer(format);
+
+   caps->dst_alpha_forced_one =
+      (ilo_format_translate_render(dev, format) !=
+       ilo_format_translate_color(dev, format));
+
+   /* sanity check */
+   if (caps->dst_alpha_forced_one) {
+      enum pipe_format render_format;
+
+      switch (format) {
+      case PIPE_FORMAT_B8G8R8X8_UNORM:
+         render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+         break;
+      default:
+         render_format = PIPE_FORMAT_NONE;
+         break;
+      }
+
+      assert(ilo_format_translate_render(dev, format) ==
+             ilo_format_translate_color(dev, render_format));
+   }
+}
+
+void
+ilo_gpe_set_fb(const struct ilo_dev *dev,
+               const struct pipe_framebuffer_state *state,
+               struct ilo_fb_state *fb)
+{
+   const struct pipe_surface *first_surf = NULL;
+   int i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   util_copy_framebuffer_state(&fb->state, state);
+
+   ilo_gpe_init_view_surface_null(dev,
+         (state->width) ? state->width : 1,
+         (state->height) ? state->height : 1,
+         1, 0, &fb->null_rt);
+
+   for (i = 0; i < state->nr_cbufs; i++) {
+      if (state->cbufs[i]) {
+         fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
+
+         if (!first_surf)
+            first_surf = state->cbufs[i];
+      } else {
+         fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
+      }
+   }
+
+   if (!first_surf && state->zsbuf)
+      first_surf = state->zsbuf;
+
+   fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
+   if (!fb->num_samples)
+      fb->num_samples = 1;
+
+   /*
+    * The PRMs list several restrictions when the framebuffer has more than
+    * one surface.  It seems they are actually lifted on GEN6+.
+    */
+}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_top.c b/src/gallium/drivers/ilo/core/ilo_state_3d_top.c
new file mode 100644 (file)
index 0000000..004656f
--- /dev/null
@@ -0,0 +1,1711 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2014 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "genhw/genhw.h"
+#include "util/u_dual_blend.h"
+#include "util/u_framebuffer.h"
+#include "util/u_half.h"
+#include "util/u_resource.h"
+
+#include "ilo_format.h"
+#include "ilo_state_3d.h"
+#include "../ilo_resource.h"
+#include "../ilo_shader.h"
+
+static void
+ve_init_cso(const struct ilo_dev *dev,
+            const struct pipe_vertex_element *state,
+            unsigned vb_index,
+            struct ilo_ve_cso *cso)
+{
+   int comp[4] = {
+      GEN6_VFCOMP_STORE_SRC,
+      GEN6_VFCOMP_STORE_SRC,
+      GEN6_VFCOMP_STORE_SRC,
+      GEN6_VFCOMP_STORE_SRC,
+   };
+   int format;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   switch (util_format_get_nr_components(state->src_format)) {
+   case 1: comp[1] = GEN6_VFCOMP_STORE_0;
+   case 2: comp[2] = GEN6_VFCOMP_STORE_0;
+   case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
+                     GEN6_VFCOMP_STORE_1_INT :
+                     GEN6_VFCOMP_STORE_1_FP;
+   }
+
+   format = ilo_format_translate_vertex(dev, state->src_format);
+
+   STATIC_ASSERT(Elements(cso->payload) >= 2);
+   cso->payload[0] =
+      vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT |
+      GEN6_VE_DW0_VALID |
+      format << GEN6_VE_DW0_FORMAT__SHIFT |
+      state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
+
+   cso->payload[1] =
+         comp[0] << GEN6_VE_DW1_COMP0__SHIFT |
+         comp[1] << GEN6_VE_DW1_COMP1__SHIFT |
+         comp[2] << GEN6_VE_DW1_COMP2__SHIFT |
+         comp[3] << GEN6_VE_DW1_COMP3__SHIFT;
+}
+
+void
+ilo_gpe_init_ve(const struct ilo_dev *dev,
+                unsigned num_states,
+                const struct pipe_vertex_element *states,
+                struct ilo_ve_state *ve)
+{
+   unsigned i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   ve->count = num_states;
+   ve->vb_count = 0;
+
+   for (i = 0; i < num_states; i++) {
+      const unsigned pipe_idx = states[i].vertex_buffer_index;
+      const unsigned instance_divisor = states[i].instance_divisor;
+      unsigned hw_idx;
+
+      /*
+       * map the pipe vb to the hardware vb, which has a fixed instance
+       * divisor
+       */
+      for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+         if (ve->vb_mapping[hw_idx] == pipe_idx &&
+             ve->instance_divisors[hw_idx] == instance_divisor)
+            break;
+      }
+
+      /* create one if there is no matching hardware vb */
+      if (hw_idx >= ve->vb_count) {
+         hw_idx = ve->vb_count++;
+
+         ve->vb_mapping[hw_idx] = pipe_idx;
+         ve->instance_divisors[hw_idx] = instance_divisor;
+      }
+
+      ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
+   }
+}
+
+void
+ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
+                        struct ilo_ve_cso *cso)
+{
+   int format;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+    *
+    *     "- This bit (Edge Flag Enable) must only be ENABLED on the last
+    *        valid VERTEX_ELEMENT structure.
+    *
+    *      - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
+    *        and Component 1-3 Control must be set to VFCOMP_NOSTORE.
+    *
+    *      - The Source Element Format must be set to the UINT format.
+    *
+    *      - [DevSNB]: Edge Flags are not supported for QUADLIST
+    *        primitives.  Software may elect to convert QUADLIST primitives
+    *        to some set of corresponding edge-flag-supported primitive
+    *        types (e.g., POLYGONs) prior to submission to the 3D pipeline."
+    */
+   cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE;
+
+   /*
+    * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
+    * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
+    * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
+    *
+    * Since all the hardware cares about is whether the flags are zero or not,
+    * we can treat them as the corresponding _UINT formats.
+    */
+   format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT);
+   cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK;
+
+   switch (format) {
+   case GEN6_FORMAT_R32_FLOAT:
+      format = GEN6_FORMAT_R32_UINT;
+      break;
+   case GEN6_FORMAT_R8_USCALED:
+      format = GEN6_FORMAT_R8_UINT;
+      break;
+   default:
+      break;
+   }
+
+   cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT);
+
+   cso->payload[1] =
+         GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
+         GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
+         GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
+         GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
+}
+
+void
+ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
+                          int comp0, int comp1, int comp2, int comp3,
+                          struct ilo_ve_cso *cso)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   STATIC_ASSERT(Elements(cso->payload) >= 2);
+
+   assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
+          comp1 != GEN6_VFCOMP_STORE_SRC &&
+          comp2 != GEN6_VFCOMP_STORE_SRC &&
+          comp3 != GEN6_VFCOMP_STORE_SRC);
+
+   cso->payload[0] = GEN6_VE_DW0_VALID;
+   cso->payload[1] =
+         comp0 << GEN6_VE_DW1_COMP0__SHIFT |
+         comp1 << GEN6_VE_DW1_COMP1__SHIFT |
+         comp2 << GEN6_VE_DW1_COMP2__SHIFT |
+         comp3 << GEN6_VE_DW1_COMP3__SHIFT;
+}
+
+void
+ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
+                    const struct ilo_shader_state *vs,
+                    struct ilo_shader_cso *cso)
+{
+   int start_grf, vue_read_len, sampler_count, max_threads;
+   uint32_t dw2, dw4, dw5;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
+   vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
+   sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 135:
+    *
+    *     "(Vertex URB Entry Read Length) Specifies the number of pairs of
+    *      128-bit vertex elements to be passed into the payload for each
+    *      vertex."
+    *
+    *     "It is UNDEFINED to set this field to 0 indicating no Vertex URB
+    *      data to be read and passed to the thread."
+    */
+   vue_read_len = (vue_read_len + 1) / 2;
+   if (!vue_read_len)
+      vue_read_len = 1;
+
+   max_threads = dev->thread_count;
+   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2)
+      max_threads *= 2;
+
+   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+   dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
+         vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
+         0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
+
+   dw5 = GEN6_VS_DW5_STATISTICS |
+         GEN6_VS_DW5_VS_ENABLE;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
+   else
+      dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
+
+   STATIC_ASSERT(Elements(cso->payload) >= 3);
+   cso->payload[0] = dw2;
+   cso->payload[1] = dw4;
+   cso->payload[2] = dw5;
+}
+
+static void
+gs_init_cso_gen6(const struct ilo_dev *dev,
+                 const struct ilo_shader_state *gs,
+                 struct ilo_shader_cso *cso)
+{
+   int start_grf, vue_read_len, max_threads;
+   uint32_t dw2, dw4, dw5, dw6;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
+      start_grf = ilo_shader_get_kernel_param(gs,
+            ILO_KERNEL_URB_DATA_START_REG);
+
+      vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
+   }
+   else {
+      start_grf = ilo_shader_get_kernel_param(gs,
+            ILO_KERNEL_VS_GEN6_SO_START_REG);
+
+      vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
+   }
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 153:
+    *
+    *     "Specifies the amount of URB data read and passed in the thread
+    *      payload for each Vertex URB entry, in 256-bit register increments.
+    *
+    *      It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
+    *      0 indicating no Vertex URB data to be read and passed to the
+    *      thread."
+    */
+   vue_read_len = (vue_read_len + 1) / 2;
+   if (!vue_read_len)
+      vue_read_len = 1;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 154:
+    *
+    *     "Maximum Number of Threads valid range is [0,27] when Rendering
+    *      Enabled bit is set."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 173:
+    *
+    *     "Programming Note: If the GS stage is enabled, software must always
+    *      allocate at least one GS URB Entry. This is true even if the GS
+    *      thread never needs to output vertices to the pipeline, e.g., when
+    *      only performing stream output. This is an artifact of the need to
+    *      pass the GS thread an initial destination URB handle."
+    *
+    * As such, we always enable rendering, and limit the number of threads.
+    */
+   if (dev->gt == 2) {
+      /* maximum is 60, but limited to 28 */
+      max_threads = 28;
+   }
+   else {
+      /* maximum is 24, but limited to 21 (see brwCreateContext()) */
+      max_threads = 21;
+   }
+
+   dw2 = GEN6_THREADDISP_SPF;
+
+   dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
+         0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
+         start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
+
+   dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
+         GEN6_GS_DW5_STATISTICS |
+         GEN6_GS_DW5_SO_STATISTICS |
+         GEN6_GS_DW5_RENDER_ENABLE;
+
+   /*
+    * we cannot make use of GEN6_GS_REORDER because it will reorder
+    * triangle strips according to D3D rules (triangle 2N+1 uses vertices
+    * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
+    * (2N+2, 2N+1, 2N+3)).
+    */
+   dw6 = GEN6_GS_DW6_GS_ENABLE;
+
+   if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
+      dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
+
+   if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
+      const uint32_t svbi_post_inc =
+         ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
+
+      dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
+      if (svbi_post_inc) {
+         dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
+                svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
+      }
+   }
+
+   STATIC_ASSERT(Elements(cso->payload) >= 4);
+   cso->payload[0] = dw2;
+   cso->payload[1] = dw4;
+   cso->payload[2] = dw5;
+   cso->payload[3] = dw6;
+}
+
+static void
+gs_init_cso_gen7(const struct ilo_dev *dev,
+                 const struct ilo_shader_state *gs,
+                 struct ilo_shader_cso *cso)
+{
+   int start_grf, vue_read_len, sampler_count, max_threads;
+   uint32_t dw2, dw4, dw5;
+
+   ILO_DEV_ASSERT(dev, 7, 7.5);
+
+   start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
+   vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
+   sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
+
+   /* in pairs */
+   vue_read_len = (vue_read_len + 1) / 2;
+
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(7.5):
+      max_threads = (dev->gt >= 2) ? 256 : 70;
+      break;
+   case ILO_GEN(7):
+      max_threads = (dev->gt == 2) ? 128 : 36;
+      break;
+   default:
+      max_threads = 1;
+      break;
+   }
+
+   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+   dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
+         GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
+         0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
+         start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
+
+   dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
+         GEN7_GS_DW5_STATISTICS |
+         GEN7_GS_DW5_GS_ENABLE;
+
+   STATIC_ASSERT(Elements(cso->payload) >= 3);
+   cso->payload[0] = dw2;
+   cso->payload[1] = dw4;
+   cso->payload[2] = dw5;
+}
+
+void
+ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
+                    const struct ilo_shader_state *gs,
+                    struct ilo_shader_cso *cso)
+{
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      gs_init_cso_gen7(dev, gs, cso);
+   else
+      gs_init_cso_gen6(dev, gs, cso);
+}
+
+static void
+view_init_null_gen6(const struct ilo_dev *dev,
+                    unsigned width, unsigned height,
+                    unsigned depth, unsigned level,
+                    struct ilo_view_surface *surf)
+{
+   uint32_t *dw;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   assert(width >= 1 && height >= 1 && depth >= 1);
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 71:
+    *
+    *     "A null surface will be used in instances where an actual surface is
+    *      not bound. When a write message is generated to a null surface, no
+    *      actual surface is written to. When a read message (including any
+    *      sampling engine message) is generated to a null surface, the result
+    *      is all zeros. Note that a null surface type is allowed to be used
+    *      with all messages, even if it is not specificially indicated as
+    *      supported. All of the remaining fields in surface state are ignored
+    *      for null surfaces, with the following exceptions:
+    *
+    *        * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
+    *          depth buffer's corresponding state for all render target
+    *          surfaces, including null.
+    *        * Surface Format must be R8G8B8A8_UNORM."
+    *
+    * From the Sandy Bridge PRM, volume 4 part 1, page 82:
+    *
+    *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
+    *      true"
+    */
+
+   STATIC_ASSERT(Elements(surf->payload) >= 6);
+   dw = surf->payload;
+
+   dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
+           GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+
+   dw[1] = 0;
+
+   dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+           (width  - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
+           level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
+
+   dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+           GEN6_TILING_X;
+
+   dw[4] = 0;
+   dw[5] = 0;
+}
+
+static void
+view_init_for_buffer_gen6(const struct ilo_dev *dev,
+                          const struct ilo_buffer *buf,
+                          unsigned offset, unsigned size,
+                          unsigned struct_size,
+                          enum pipe_format elem_format,
+                          bool is_rt, bool render_cache_rw,
+                          struct ilo_view_surface *surf)
+{
+   const int elem_size = util_format_get_blocksize(elem_format);
+   int width, height, depth, pitch;
+   int surface_format, num_entries;
+   uint32_t *dw;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   /*
+    * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
+    * structure in a buffer.
+    */
+
+   surface_format = ilo_format_translate_color(dev, elem_format);
+
+   num_entries = size / struct_size;
+   /* see if there is enough space to fit another element */
+   if (size % struct_size >= elem_size)
+      num_entries++;
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 76:
+    *
+    *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
+    *      Address) specifies the base address of first element of the
+    *      surface. The surface is interpreted as a simple array of that
+    *      single element type. The address must be naturally-aligned to the
+    *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
+    *      must be 16-byte aligned).
+    *
+    *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
+    *      the base address of the first element of the surface, computed in
+    *      software by adding the surface base address to the byte offset of
+    *      the element in the buffer."
+    */
+   if (is_rt)
+      assert(offset % elem_size == 0);
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 77:
+    *
+    *     "For buffer surfaces, the number of entries in the buffer ranges
+    *      from 1 to 2^27."
+    */
+   assert(num_entries >= 1 && num_entries <= 1 << 27);
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+    *
+    *     "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
+    *      indicates the size of the structure."
+    */
+   pitch = struct_size;
+
+   pitch--;
+   num_entries--;
+   /* bits [6:0] */
+   width  = (num_entries & 0x0000007f);
+   /* bits [19:7] */
+   height = (num_entries & 0x000fff80) >> 7;
+   /* bits [26:20] */
+   depth  = (num_entries & 0x07f00000) >> 20;
+
+   STATIC_ASSERT(Elements(surf->payload) >= 6);
+   dw = surf->payload;
+
+   dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
+           surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+   if (render_cache_rw)
+      dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
+
+   dw[1] = offset;
+
+   dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+           width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
+
+   dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+           pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
+
+   dw[4] = 0;
+   dw[5] = 0;
+}
+
+static void
+view_init_for_texture_gen6(const struct ilo_dev *dev,
+                           const struct ilo_texture *tex,
+                           enum pipe_format format,
+                           unsigned first_level,
+                           unsigned num_levels,
+                           unsigned first_layer,
+                           unsigned num_layers,
+                           bool is_rt,
+                           struct ilo_view_surface *surf)
+{
+   int surface_type, surface_format;
+   int width, height, depth, pitch, lod;
+   uint32_t *dw;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
+   assert(surface_type != GEN6_SURFTYPE_BUFFER);
+
+   if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
+      format = PIPE_FORMAT_Z32_FLOAT;
+
+   if (is_rt)
+      surface_format = ilo_format_translate_render(dev, format);
+   else
+      surface_format = ilo_format_translate_texture(dev, format);
+   assert(surface_format >= 0);
+
+   width = tex->image.width0;
+   height = tex->image.height0;
+   depth = (tex->base.target == PIPE_TEXTURE_3D) ?
+      tex->base.depth0 : num_layers;
+   pitch = tex->image.bo_stride;
+
+   if (surface_type == GEN6_SURFTYPE_CUBE) {
+      /*
+       * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+       *
+       *     "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
+       *      range of this field (Depth) is [0,84], indicating the number of
+       *      cube array elements (equal to the number of underlying 2D array
+       *      elements divided by 6). For other surfaces, this field must be
+       *      zero."
+       *
+       * When is_rt is true, we treat the texture as a 2D one to avoid the
+       * restriction.
+       */
+      if (is_rt) {
+         surface_type = GEN6_SURFTYPE_2D;
+      }
+      else {
+         assert(num_layers % 6 == 0);
+         depth = num_layers / 6;
+      }
+   }
+
+   /* sanity check the size */
+   assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
+   switch (surface_type) {
+   case GEN6_SURFTYPE_1D:
+      assert(width <= 8192 && height == 1 && depth <= 512);
+      assert(first_layer < 512 && num_layers <= 512);
+      break;
+   case GEN6_SURFTYPE_2D:
+      assert(width <= 8192 && height <= 8192 && depth <= 512);
+      assert(first_layer < 512 && num_layers <= 512);
+      break;
+   case GEN6_SURFTYPE_3D:
+      assert(width <= 2048 && height <= 2048 && depth <= 2048);
+      assert(first_layer < 2048 && num_layers <= 512);
+      if (!is_rt)
+         assert(first_layer == 0);
+      break;
+   case GEN6_SURFTYPE_CUBE:
+      assert(width <= 8192 && height <= 8192 && depth <= 85);
+      assert(width == height);
+      assert(first_layer < 512 && num_layers <= 512);
+      if (is_rt)
+         assert(first_layer == 0);
+      break;
+   default:
+      assert(!"unexpected surface type");
+      break;
+   }
+
+   /* non-full array spacing is supported only on GEN7+ */
+   assert(tex->image.walk != ILO_IMAGE_WALK_LOD);
+   /* non-interleaved samples are supported only on GEN7+ */
+   if (tex->base.nr_samples > 1)
+      assert(tex->image.interleaved_samples);
+
+   if (is_rt) {
+      assert(num_levels == 1);
+      lod = first_level;
+   }
+   else {
+      lod = num_levels - 1;
+   }
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 76:
+    *
+    *     "Linear render target surface base addresses must be element-size
+    *      aligned, for non-YUV surface formats, or a multiple of 2
+    *      element-sizes for YUV surface formats. Other linear surfaces have
+    *      no alignment requirements (byte alignment is sufficient.)"
+    *
+    * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+    *
+    *     "For linear render target surfaces, the pitch must be a multiple
+    *      of the element size for non-YUV surface formats. Pitch must be a
+    *      multiple of 2 * element size for YUV surface formats."
+    *
+    * From the Sandy Bridge PRM, volume 4 part 1, page 86:
+    *
+    *     "For linear surfaces, this field (X Offset) must be zero"
+    */
+   if (tex->image.tiling == GEN6_TILING_NONE) {
+      if (is_rt) {
+         const int elem_size = util_format_get_blocksize(format);
+         assert(pitch % elem_size == 0);
+      }
+   }
+
+   STATIC_ASSERT(Elements(surf->payload) >= 6);
+   dw = surf->payload;
+
+   dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
+           surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
+           GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
+
+   if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
+      dw[0] |= 1 << 9 |
+               GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+   }
+
+   if (is_rt)
+      dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
+
+   dw[1] = 0;
+
+   dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+           (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
+           lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
+
+   assert(tex->image.tiling != GEN8_TILING_W);
+   dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+           (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
+           tex->image.tiling;
+
+   dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
+           first_layer << 17 |
+           (num_layers - 1) << 8 |
+           ((tex->base.nr_samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
+                                         GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
+
+   dw[5] = 0;
+
+   assert(tex->image.align_j == 2 || tex->image.align_j == 4);
+   if (tex->image.align_j == 4)
+      dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
+}
+
+static void
+view_init_null_gen7(const struct ilo_dev *dev,
+                    unsigned width, unsigned height,
+                    unsigned depth, unsigned level,
+                    struct ilo_view_surface *surf)
+{
+   uint32_t *dw;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   assert(width >= 1 && height >= 1 && depth >= 1);
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 62:
+    *
+    *     "A null surface is used in instances where an actual surface is not
+    *      bound. When a write message is generated to a null surface, no
+    *      actual surface is written to. When a read message (including any
+    *      sampling engine message) is generated to a null surface, the result
+    *      is all zeros.  Note that a null surface type is allowed to be used
+    *      with all messages, even if it is not specificially indicated as
+    *      supported. All of the remaining fields in surface state are ignored
+    *      for null surfaces, with the following exceptions:
+    *
+    *      * Width, Height, Depth, LOD, and Render Target View Extent fields
+    *        must match the depth buffer's corresponding state for all render
+    *        target surfaces, including null.
+    *      * All sampling engine and data port messages support null surfaces
+    *        with the above behavior, even if not mentioned as specifically
+    *        supported, except for the following:
+    *        * Data Port Media Block Read/Write messages.
+    *      * The Surface Type of a surface used as a render target (accessed
+    *        via the Data Port's Render Target Write message) must be the same
+    *        as the Surface Type of all other render targets and of the depth
+    *        buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
+    *        buffer or render targets are SURFTYPE_NULL."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 65:
+    *
+    *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
+    *      true"
+    */
+
+   STATIC_ASSERT(Elements(surf->payload) >= 13);
+   dw = surf->payload;
+
+   dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
+           GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
+   else
+      dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
+
+   dw[1] = 0;
+
+   dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
+           GEN_SHIFT32(width  - 1, GEN7_SURFACE_DW2_WIDTH);
+
+   dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH);
+
+   dw[4] = 0;
+   dw[5] = level;
+
+   dw[6] = 0;
+   dw[7] = 0;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
+}
+
+static void
+view_init_for_buffer_gen7(const struct ilo_dev *dev,
+                          const struct ilo_buffer *buf,
+                          unsigned offset, unsigned size,
+                          unsigned struct_size,
+                          enum pipe_format elem_format,
+                          bool is_rt, bool render_cache_rw,
+                          struct ilo_view_surface *surf)
+{
+   const bool typed = (elem_format != PIPE_FORMAT_NONE);
+   const bool structured = (!typed && struct_size > 1);
+   const int elem_size = (typed) ?
+      util_format_get_blocksize(elem_format) : 1;
+   int width, height, depth, pitch;
+   int surface_type, surface_format, num_entries;
+   uint32_t *dw;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
+
+   surface_format = (typed) ?
+      ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW;
+
+   num_entries = size / struct_size;
+   /* see if there is enough space to fit another element */
+   if (size % struct_size >= elem_size && !structured)
+      num_entries++;
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 67:
+    *
+    *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
+    *      Address) specifies the base address of first element of the
+    *      surface. The surface is interpreted as a simple array of that
+    *      single element type. The address must be naturally-aligned to the
+    *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
+    *      must be 16-byte aligned)
+    *
+    *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
+    *      the base address of the first element of the surface, computed in
+    *      software by adding the surface base address to the byte offset of
+    *      the element in the buffer."
+    */
+   if (is_rt)
+      assert(offset % elem_size == 0);
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+    *
+    *     "For typed buffer and structured buffer surfaces, the number of
+    *      entries in the buffer ranges from 1 to 2^27.  For raw buffer
+    *      surfaces, the number of entries in the buffer is the number of
+    *      bytes which can range from 1 to 2^30."
+    */
+   assert(num_entries >= 1 &&
+          num_entries <= 1 << ((typed || structured) ? 27 : 30));
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 69:
+    *
+    *     "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
+    *      11 if the Surface Format is RAW (the size of the buffer must be a
+    *      multiple of 4 bytes)."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+    *
+    *     "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
+    *      field (Surface Pitch) indicates the size of the structure."
+    *
+    *     "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
+    *      must be a multiple of 4 bytes."
+    */
+   if (structured)
+      assert(struct_size % 4 == 0);
+   else if (!typed)
+      assert(num_entries % 4 == 0);
+
+   pitch = struct_size;
+
+   pitch--;
+   num_entries--;
+   /* bits [6:0] */
+   width  = (num_entries & 0x0000007f);
+   /* bits [20:7] */
+   height = (num_entries & 0x001fff80) >> 7;
+   /* bits [30:21] */
+   depth  = (num_entries & 0x7fe00000) >> 21;
+   /* limit to [26:21] */
+   if (typed || structured)
+      depth &= 0x3f;
+
+   STATIC_ASSERT(Elements(surf->payload) >= 13);
+   dw = surf->payload;
+
+   dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+           surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+   if (render_cache_rw)
+      dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      dw[8] = offset;
+      memset(&dw[9], 0, sizeof(*dw) * (13 - 9));
+   } else {
+      dw[1] = offset;
+   }
+
+   dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
+           GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
+
+   dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
+           pitch;
+
+   dw[4] = 0;
+   dw[5] = 0;
+
+   dw[6] = 0;
+   dw[7] = 0;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+      dw[7] |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
+               GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
+               GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
+               GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
+   }
+}
+
+static void
+view_init_for_texture_gen7(const struct ilo_dev *dev,
+                           const struct ilo_texture *tex,
+                           enum pipe_format format,
+                           unsigned first_level,
+                           unsigned num_levels,
+                           unsigned first_layer,
+                           unsigned num_layers,
+                           bool is_rt,
+                           struct ilo_view_surface *surf)
+{
+   int surface_type, surface_format;
+   int width, height, depth, pitch, lod;
+   uint32_t *dw;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
+   assert(surface_type != GEN6_SURFTYPE_BUFFER);
+
+   if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
+      format = PIPE_FORMAT_Z32_FLOAT;
+
+   if (is_rt)
+      surface_format = ilo_format_translate_render(dev, format);
+   else
+      surface_format = ilo_format_translate_texture(dev, format);
+   assert(surface_format >= 0);
+
+   width = tex->image.width0;
+   height = tex->image.height0;
+   depth = (tex->base.target == PIPE_TEXTURE_3D) ?
+      tex->base.depth0 : num_layers;
+   pitch = tex->image.bo_stride;
+
+   if (surface_type == GEN6_SURFTYPE_CUBE) {
+      /*
+       * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+       *
+       *     "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
+       *      this field is [0,340], indicating the number of cube array
+       *      elements (equal to the number of underlying 2D array elements
+       *      divided by 6). For other surfaces, this field must be zero."
+       *
+       * When is_rt is true, we treat the texture as a 2D one to avoid the
+       * restriction.
+       */
+      if (is_rt) {
+         surface_type = GEN6_SURFTYPE_2D;
+      }
+      else {
+         assert(num_layers % 6 == 0);
+         depth = num_layers / 6;
+      }
+   }
+
+   /* sanity check the size */
+   assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
+   assert(first_layer < 2048 && num_layers <= 2048);
+   switch (surface_type) {
+   case GEN6_SURFTYPE_1D:
+      assert(width <= 16384 && height == 1 && depth <= 2048);
+      break;
+   case GEN6_SURFTYPE_2D:
+      assert(width <= 16384 && height <= 16384 && depth <= 2048);
+      break;
+   case GEN6_SURFTYPE_3D:
+      assert(width <= 2048 && height <= 2048 && depth <= 2048);
+      if (!is_rt)
+         assert(first_layer == 0);
+      break;
+   case GEN6_SURFTYPE_CUBE:
+      assert(width <= 16384 && height <= 16384 && depth <= 86);
+      assert(width == height);
+      if (is_rt)
+         assert(first_layer == 0);
+      break;
+   default:
+      assert(!"unexpected surface type");
+      break;
+   }
+
+   if (is_rt) {
+      assert(num_levels == 1);
+      lod = first_level;
+   }
+   else {
+      lod = num_levels - 1;
+   }
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+    *
+    *     "The Base Address for linear render target surfaces and surfaces
+    *      accessed with the typed surface read/write data port messages must
+    *      be element-size aligned, for non-YUV surface formats, or a multiple
+    *      of 2 element-sizes for YUV surface formats.  Other linear surfaces
+    *      have no alignment requirements (byte alignment is sufficient)."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+    *
+    *     "For linear render target surfaces and surfaces accessed with the
+    *      typed data port messages, the pitch must be a multiple of the
+    *      element size for non-YUV surface formats. Pitch must be a multiple
+    *      of 2 * element size for YUV surface formats. For linear surfaces
+    *      with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
+    *      of 4 bytes.For other linear surfaces, the pitch can be any multiple
+    *      of bytes."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 74:
+    *
+    *     "For linear surfaces, this field (X Offset) must be zero."
+    */
+   if (tex->image.tiling == GEN6_TILING_NONE) {
+      if (is_rt) {
+         const int elem_size = util_format_get_blocksize(format);
+         assert(pitch % elem_size == 0);
+      }
+   }
+
+   STATIC_ASSERT(Elements(surf->payload) >= 13);
+   dw = surf->payload;
+
+   dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+           surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+    *
+    *     "If this field (Surface Array) is enabled, the Surface Type must be
+    *      SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
+    *      disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
+    *      SURFTYPE_CUBE, the Depth field must be set to zero."
+    *
+    * For non-3D sampler surfaces, resinfo (the sampler message) always
+    * returns zero for the number of layers when this field is not set.
+    */
+   if (surface_type != GEN6_SURFTYPE_3D) {
+      if (util_resource_is_array_texture(&tex->base))
+         dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
+      else
+         assert(depth == 1);
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      switch (tex->image.align_j) {
+      case 4:
+         dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
+         break;
+      case 8:
+         dw[0] |= GEN8_SURFACE_DW0_VALIGN_8;
+         break;
+      case 16:
+         dw[0] |= GEN8_SURFACE_DW0_VALIGN_16;
+         break;
+      default:
+         assert(!"unsupported valign");
+         break;
+      }
+
+      switch (tex->image.align_i) {
+      case 4:
+         dw[0] |= GEN8_SURFACE_DW0_HALIGN_4;
+         break;
+      case 8:
+         dw[0] |= GEN8_SURFACE_DW0_HALIGN_8;
+         break;
+      case 16:
+         dw[0] |= GEN8_SURFACE_DW0_HALIGN_16;
+         break;
+      default:
+         assert(!"unsupported halign");
+         break;
+      }
+
+      dw[0] |= tex->image.tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
+   } else {
+      assert(tex->image.align_i == 4 || tex->image.align_i == 8);
+      assert(tex->image.align_j == 2 || tex->image.align_j == 4);
+
+      if (tex->image.align_j == 4)
+         dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
+
+      if (tex->image.align_i == 8)
+         dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
+
+      assert(tex->image.tiling != GEN8_TILING_W);
+      dw[0] |= tex->image.tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
+
+      if (tex->image.walk == ILO_IMAGE_WALK_LOD)
+         dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
+      else
+         dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
+   }
+
+   if (is_rt)
+      dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
+
+   if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
+      dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      assert(tex->image.layer_height % 4 == 0);
+      dw[1] = tex->image.layer_height / 4;
+   } else {
+      dw[1] = 0;
+   }
+
+   dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
+           GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
+
+   dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
+           (pitch - 1);
+
+   dw[4] = first_layer << 18 |
+           (num_layers - 1) << 7;
+
+   /*
+    * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
+    * means the samples are interleaved.  The layouts are the same when the
+    * number of samples is 1.
+    */
+   if (tex->image.interleaved_samples && tex->base.nr_samples > 1) {
+      assert(!is_rt);
+      dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
+   }
+   else {
+      dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
+   }
+
+   switch (tex->base.nr_samples) {
+   case 0:
+   case 1:
+   default:
+      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
+      break;
+   case 2:
+      dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2;
+      break;
+   case 4:
+      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
+      break;
+   case 8:
+      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
+      break;
+   case 16:
+      dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16;
+      break;
+   }
+
+   dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
+           lod;
+
+   dw[6] = 0;
+   dw[7] = 0;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+      dw[7] |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
+               GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
+               GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
+               GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
+}
+
+void
+ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
+                               unsigned width, unsigned height,
+                               unsigned depth, unsigned level,
+                               struct ilo_view_surface *surf)
+{
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      view_init_null_gen7(dev,
+            width, height, depth, level, surf);
+   } else {
+      view_init_null_gen6(dev,
+            width, height, depth, level, surf);
+   }
+
+   surf->bo = NULL;
+   surf->scanout = false;
+}
+
+void
+ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
+                                     const struct ilo_buffer *buf,
+                                     unsigned offset, unsigned size,
+                                     unsigned struct_size,
+                                     enum pipe_format elem_format,
+                                     bool is_rt, bool render_cache_rw,
+                                     struct ilo_view_surface *surf)
+{
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      view_init_for_buffer_gen7(dev, buf, offset, size,
+            struct_size, elem_format, is_rt, render_cache_rw, surf);
+   } else {
+      view_init_for_buffer_gen6(dev, buf, offset, size,
+            struct_size, elem_format, is_rt, render_cache_rw, surf);
+   }
+
+   /* do not increment reference count */
+   surf->bo = buf->bo;
+   surf->scanout = false;
+}
+
+void
+ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev,
+                                      const struct ilo_texture *tex,
+                                      enum pipe_format format,
+                                      unsigned first_level,
+                                      unsigned num_levels,
+                                      unsigned first_layer,
+                                      unsigned num_layers,
+                                      bool is_rt,
+                                      struct ilo_view_surface *surf)
+{
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      view_init_for_texture_gen7(dev, tex, format,
+            first_level, num_levels, first_layer, num_layers,
+            is_rt, surf);
+   } else {
+      view_init_for_texture_gen6(dev, tex, format,
+            first_level, num_levels, first_layer, num_layers,
+            is_rt, surf);
+   }
+
+   /* do not increment reference count */
+   surf->bo = tex->image.bo;
+
+   /* assume imported RTs are scanouts */
+   surf->scanout = ((tex->base.bind & PIPE_BIND_SCANOUT) ||
+         (tex->imported && (tex->base.bind &  PIPE_BIND_RENDER_TARGET)));
+}
+
+static void
+sampler_init_border_color_gen6(const struct ilo_dev *dev,
+                               const union pipe_color_union *color,
+                               uint32_t *dw, int num_dwords)
+{
+   float rgba[4] = {
+      color->f[0], color->f[1], color->f[2], color->f[3],
+   };
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   assert(num_dwords >= 12);
+
+   /*
+    * This state is not documented in the Sandy Bridge PRM, but in the
+    * Ironlake PRM.  SNORM8 seems to be in DW11 instead of DW1.
+    */
+
+   /* IEEE_FP */
+   dw[1] = fui(rgba[0]);
+   dw[2] = fui(rgba[1]);
+   dw[3] = fui(rgba[2]);
+   dw[4] = fui(rgba[3]);
+
+   /* FLOAT_16 */
+   dw[5] = util_float_to_half(rgba[0]) |
+           util_float_to_half(rgba[1]) << 16;
+   dw[6] = util_float_to_half(rgba[2]) |
+           util_float_to_half(rgba[3]) << 16;
+
+   /* clamp to [-1.0f, 1.0f] */
+   rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
+   rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
+   rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
+   rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
+
+   /* SNORM16 */
+   dw[9] =  (int16_t) util_iround(rgba[0] * 32767.0f) |
+            (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
+   dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
+            (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
+
+   /* SNORM8 */
+   dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
+            (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
+            (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
+            (int8_t) util_iround(rgba[3] * 127.0f) << 24;
+
+   /* clamp to [0.0f, 1.0f] */
+   rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
+   rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
+   rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
+   rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
+
+   /* UNORM8 */
+   dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
+           (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
+           (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
+           (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
+
+   /* UNORM16 */
+   dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
+           (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
+   dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
+           (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
+}
+
+/**
+ * Translate a pipe texture mipfilter to the matching hardware mipfilter.
+ */
+static int
+gen6_translate_tex_mipfilter(unsigned filter)
+{
+   switch (filter) {
+   case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
+   case PIPE_TEX_MIPFILTER_LINEAR:  return GEN6_MIPFILTER_LINEAR;
+   case PIPE_TEX_MIPFILTER_NONE:    return GEN6_MIPFILTER_NONE;
+   default:
+      assert(!"unknown mipfilter");
+      return GEN6_MIPFILTER_NONE;
+   }
+}
+
+/**
+ * Translate a pipe texture filter to the matching hardware mapfilter.
+ */
+static int
+gen6_translate_tex_filter(unsigned filter)
+{
+   switch (filter) {
+   case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
+   case PIPE_TEX_FILTER_LINEAR:  return GEN6_MAPFILTER_LINEAR;
+   default:
+      assert(!"unknown sampler filter");
+      return GEN6_MAPFILTER_NEAREST;
+   }
+}
+
+/**
+ * Translate a pipe texture coordinate wrapping mode to the matching hardware
+ * wrapping mode.
+ */
+static int
+gen6_translate_tex_wrap(unsigned wrap)
+{
+   switch (wrap) {
+   case PIPE_TEX_WRAP_CLAMP:              return GEN8_TEXCOORDMODE_HALF_BORDER;
+   case PIPE_TEX_WRAP_REPEAT:             return GEN6_TEXCOORDMODE_WRAP;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:      return GEN6_TEXCOORDMODE_CLAMP;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:    return GEN6_TEXCOORDMODE_CLAMP_BORDER;
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:      return GEN6_TEXCOORDMODE_MIRROR;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+   default:
+      assert(!"unknown sampler wrap mode");
+      return GEN6_TEXCOORDMODE_WRAP;
+   }
+}
+
+/**
+ * Translate a pipe shadow compare function to the matching hardware shadow
+ * function.
+ */
+static int
+gen6_translate_shadow_func(unsigned func)
+{
+   /*
+    * For PIPE_FUNC_x, the reference value is on the left-hand side of the
+    * comparison, and 1.0 is returned when the comparison is true.
+    *
+    * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
+    * the comparison, and 0.0 is returned when the comparison is true.
+    */
+   switch (func) {
+   case PIPE_FUNC_NEVER:      return GEN6_COMPAREFUNCTION_ALWAYS;
+   case PIPE_FUNC_LESS:       return GEN6_COMPAREFUNCTION_LEQUAL;
+   case PIPE_FUNC_EQUAL:      return GEN6_COMPAREFUNCTION_NOTEQUAL;
+   case PIPE_FUNC_LEQUAL:     return GEN6_COMPAREFUNCTION_LESS;
+   case PIPE_FUNC_GREATER:    return GEN6_COMPAREFUNCTION_GEQUAL;
+   case PIPE_FUNC_NOTEQUAL:   return GEN6_COMPAREFUNCTION_EQUAL;
+   case PIPE_FUNC_GEQUAL:     return GEN6_COMPAREFUNCTION_GREATER;
+   case PIPE_FUNC_ALWAYS:     return GEN6_COMPAREFUNCTION_NEVER;
+   default:
+      assert(!"unknown shadow compare function");
+      return GEN6_COMPAREFUNCTION_NEVER;
+   }
+}
+
+void
+ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
+                         const struct pipe_sampler_state *state,
+                         struct ilo_sampler_cso *sampler)
+{
+   int mip_filter, min_filter, mag_filter, max_aniso;
+   int lod_bias, max_lod, min_lod;
+   int wrap_s, wrap_t, wrap_r, wrap_cube;
+   uint32_t dw0, dw1, dw3;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   memset(sampler, 0, sizeof(*sampler));
+
+   mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
+   min_filter = gen6_translate_tex_filter(state->min_img_filter);
+   mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
+
+   sampler->anisotropic = state->max_anisotropy;
+
+   if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
+      max_aniso = state->max_anisotropy / 2 - 1;
+   else if (state->max_anisotropy > 16)
+      max_aniso = GEN6_ANISORATIO_16;
+   else
+      max_aniso = GEN6_ANISORATIO_2;
+
+   /*
+    *
+    * Here is how the hardware calculate per-pixel LOD, from my reading of the
+    * PRMs:
+    *
+    *  1) LOD is set to log2(ratio of texels to pixels) if not specified in
+    *     other ways.  The number of texels is measured using level
+    *     SurfMinLod.
+    *  2) Bias is added to LOD.
+    *  3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
+    *     compared with Base to determine whether magnification or
+    *     minification is needed.  (if preclamp is disabled, LOD is compared
+    *     with Base before clamping)
+    *  4) If magnification is needed, or no mipmapping is requested, LOD is
+    *     set to floor(MinLod).
+    *  5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
+    *
+    * With Gallium interface, Base is always zero and
+    * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
+    */
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      const float scale = 256.0f;
+
+      /* [-16.0, 16.0) in S4.8 */
+      lod_bias = (int)
+         (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
+      lod_bias &= 0x1fff;
+
+      /* [0.0, 14.0] in U4.8 */
+      max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
+      min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
+   }
+   else {
+      const float scale = 64.0f;
+
+      /* [-16.0, 16.0) in S4.6 */
+      lod_bias = (int)
+         (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
+      lod_bias &= 0x7ff;
+
+      /* [0.0, 13.0] in U4.6 */
+      max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
+      min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
+   }
+
+   /*
+    * We want LOD to be clamped to determine magnification/minification, and
+    * get set to zero when it is magnification or when mipmapping is disabled.
+    * The hardware would set LOD to floor(MinLod) and that is a problem when
+    * MinLod is greater than or equal to 1.0f.
+    *
+    * With Base being zero, it is always minification when MinLod is non-zero.
+    * To achieve our goal, we just need to set MinLod to zero and set
+    * MagFilter to MinFilter when mipmapping is disabled.
+    */
+   if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
+      min_lod = 0;
+      mag_filter = min_filter;
+   }
+
+   /* determine wrap s/t/r */
+   wrap_s = gen6_translate_tex_wrap(state->wrap_s);
+   wrap_t = gen6_translate_tex_wrap(state->wrap_t);
+   wrap_r = gen6_translate_tex_wrap(state->wrap_r);
+   if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+      /*
+       * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
+       * PIPE_TEX_WRAP_CLAMP_TO_EDGE;  for linear filtering,
+       * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
+       * additionally clamping the texture coordinates to [0.0, 1.0].
+       *
+       * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8.  The
+       * clamping has to be taken care of in the shaders.  There are two
+       * filters here, but let the minification one has a say.
+       */
+      const bool clamp_is_to_edge =
+         (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+
+      if (clamp_is_to_edge) {
+         if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER)
+            wrap_s = GEN6_TEXCOORDMODE_CLAMP;
+         if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER)
+            wrap_t = GEN6_TEXCOORDMODE_CLAMP;
+         if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER)
+            wrap_r = GEN6_TEXCOORDMODE_CLAMP;
+      } else {
+         if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) {
+            wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+            sampler->saturate_s = true;
+         }
+         if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) {
+            wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+            sampler->saturate_t = true;
+         }
+         if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) {
+            wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+            sampler->saturate_r = true;
+         }
+      }
+   }
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 107:
+    *
+    *     "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
+    *      and TEXCOORDMODE_CUBE settings are valid, and each TC component
+    *      must have the same Address Control mode."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 96:
+    *
+    *     "This field (Cube Surface Control Mode) must be set to
+    *      CUBECTRLMODE_PROGRAMMED"
+    *
+    * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
+    * map filtering.
+    */
+   if (state->seamless_cube_map &&
+       (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
+        state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
+      wrap_cube = GEN6_TEXCOORDMODE_CUBE;
+   }
+   else {
+      wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
+   }
+
+   if (!state->normalized_coords) {
+      /*
+       * From the Ivy Bridge PRM, volume 4 part 1, page 98:
+       *
+       *     "The following state must be set as indicated if this field
+       *      (Non-normalized Coordinate Enable) is enabled:
+       *
+       *      - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
+       *        TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
+       *      - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
+       *      - Mag Mode Filter must be MAPFILTER_NEAREST or
+       *        MAPFILTER_LINEAR.
+       *      - Min Mode Filter must be MAPFILTER_NEAREST or
+       *        MAPFILTER_LINEAR.
+       *      - Mip Mode Filter must be MIPFILTER_NONE.
+       *      - Min LOD must be 0.
+       *      - Max LOD must be 0.
+       *      - MIP Count must be 0.
+       *      - Surface Min LOD must be 0.
+       *      - Texture LOD Bias must be 0."
+       */
+      assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
+             wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
+      assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
+             wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
+      assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
+             wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
+
+      assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
+             mag_filter == GEN6_MAPFILTER_LINEAR);
+      assert(min_filter == GEN6_MAPFILTER_NEAREST ||
+             min_filter == GEN6_MAPFILTER_LINEAR);
+
+      /* work around a bug in util_blitter */
+      mip_filter = GEN6_MIPFILTER_NONE;
+
+      assert(mip_filter == GEN6_MIPFILTER_NONE);
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      dw0 = 1 << 28 |
+            mip_filter << 20 |
+            lod_bias << 1;
+
+      sampler->dw_filter = mag_filter << 17 |
+                           min_filter << 14;
+
+      sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
+                                 GEN6_MAPFILTER_ANISOTROPIC << 14 |
+                                 1;
+
+      dw1 = min_lod << 20 |
+            max_lod << 8;
+
+      if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
+         dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
+
+      dw3 = max_aniso << 19;
+
+      /* round the coordinates for linear filtering */
+      if (min_filter != GEN6_MAPFILTER_NEAREST) {
+         dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
+                 GEN6_SAMPLER_DW3_V_MIN_ROUND |
+                 GEN6_SAMPLER_DW3_R_MIN_ROUND);
+      }
+      if (mag_filter != GEN6_MAPFILTER_NEAREST) {
+         dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
+                 GEN6_SAMPLER_DW3_V_MAG_ROUND |
+                 GEN6_SAMPLER_DW3_R_MAG_ROUND);
+      }
+
+      if (!state->normalized_coords)
+         dw3 |= 1 << 10;
+
+      sampler->dw_wrap = wrap_s << 6 |
+                         wrap_t << 3 |
+                         wrap_r;
+
+      /*
+       * As noted in the classic i965 driver, the HW may still reference
+       * wrap_t and wrap_r for 1D textures.  We need to set them to a safe
+       * mode
+       */
+      sampler->dw_wrap_1d = wrap_s << 6 |
+                            GEN6_TEXCOORDMODE_WRAP << 3 |
+                            GEN6_TEXCOORDMODE_WRAP;
+
+      sampler->dw_wrap_cube = wrap_cube << 6 |
+                              wrap_cube << 3 |
+                              wrap_cube;
+
+      STATIC_ASSERT(Elements(sampler->payload) >= 7);
+
+      sampler->payload[0] = dw0;
+      sampler->payload[1] = dw1;
+      sampler->payload[2] = dw3;
+
+      memcpy(&sampler->payload[3],
+            state->border_color.ui, sizeof(state->border_color.ui));
+   }
+   else {
+      dw0 = 1 << 28 |
+            mip_filter << 20 |
+            lod_bias << 3;
+
+      if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
+         dw0 |= gen6_translate_shadow_func(state->compare_func);
+
+      sampler->dw_filter = (min_filter != mag_filter) << 27 |
+                           mag_filter << 17 |
+                           min_filter << 14;
+
+      sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
+                                 GEN6_MAPFILTER_ANISOTROPIC << 14;
+
+      dw1 = min_lod << 22 |
+            max_lod << 12;
+
+      sampler->dw_wrap = wrap_s << 6 |
+                         wrap_t << 3 |
+                         wrap_r;
+
+      sampler->dw_wrap_1d = wrap_s << 6 |
+                            GEN6_TEXCOORDMODE_WRAP << 3 |
+                            GEN6_TEXCOORDMODE_WRAP;
+
+      sampler->dw_wrap_cube = wrap_cube << 6 |
+                              wrap_cube << 3 |
+                              wrap_cube;
+
+      dw3 = max_aniso << 19;
+
+      /* round the coordinates for linear filtering */
+      if (min_filter != GEN6_MAPFILTER_NEAREST) {
+         dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
+                 GEN6_SAMPLER_DW3_V_MIN_ROUND |
+                 GEN6_SAMPLER_DW3_R_MIN_ROUND);
+      }
+      if (mag_filter != GEN6_MAPFILTER_NEAREST) {
+         dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
+                 GEN6_SAMPLER_DW3_V_MAG_ROUND |
+                 GEN6_SAMPLER_DW3_R_MAG_ROUND);
+      }
+
+      if (!state->normalized_coords)
+         dw3 |= 1;
+
+      STATIC_ASSERT(Elements(sampler->payload) >= 15);
+
+      sampler->payload[0] = dw0;
+      sampler->payload[1] = dw1;
+      sampler->payload[2] = dw3;
+
+      sampler_init_border_color_gen6(dev,
+            &state->border_color, &sampler->payload[3], 12);
+   }
+}
index 3383eaf247a353ae9a0af535e0a679fe666b29f5..02051299675ad783308918c1be381ebb87f5400e 100644 (file)
  *    Chia-I Wu <olv@lunarg.com>
  */
 
+#include "core/ilo_state_3d.h"
 #include "util/u_draw.h"
 #include "util/u_pack_color.h"
 
 #include "ilo_draw.h"
 #include "ilo_state.h"
-#include "ilo_state_3d.h"
 #include "ilo_blit.h"
 #include "ilo_blitter.h"
 
index 9fa53050dff5a3b39196a6297b6a1c85010e38cd..bc6925db21738a30a0afd60d3fde52add632b36b 100644 (file)
 #define ILO_BUILDER_3D_TOP_H
 
 #include "genhw/genhw.h"
+#include "core/ilo_state_3d.h"
 #include "core/intel_winsys.h"
 
 #include "ilo_common.h"
 #include "ilo_resource.h"
 #include "ilo_shader.h"
 #include "ilo_state.h"
-#include "ilo_state_3d.h"
 #include "ilo_builder.h"
 
 static inline void
index 788a85887ddb990ddcd0b16bf4707ec647145d4a..8a2926c1945a66a2b130f6e89784a7e634d3b050 100644 (file)
 
 #include "genhw/genhw.h" /* for SBE setup */
 #include "tgsi/tgsi_parse.h"
+#include "core/ilo_state_3d.h"
 #include "core/intel_winsys.h"
 
 #include "shader/ilo_shader_internal.h"
 #include "ilo_builder.h"
 #include "ilo_state.h"
-#include "ilo_state_3d.h"
 #include "ilo_shader.h"
 
 struct ilo_shader_cache {
index 175e7c659d4a593b36d554f46559e0bb6743b3b3..45f85e79f4bcd3157d20d77b45ec77511e87d906 100644 (file)
@@ -25,6 +25,7 @@
  *    Chia-I Wu <olv@lunarg.com>
  */
 
+#include "core/ilo_state_3d.h"
 #include "util/u_dynarray.h"
 #include "util/u_helpers.h"
 #include "util/u_upload_mgr.h"
@@ -33,7 +34,6 @@
 #include "ilo_resource.h"
 #include "ilo_shader.h"
 #include "ilo_state.h"
-#include "ilo_state_3d.h"
 
 static void
 finalize_shader_states(struct ilo_state_vector *vec)
index 78f2053b07cadae8272b148fe869365c6f7ab9a6..fd0a3156ebc68f1333f31f27e5c3187e11f8c595 100644 (file)
 #ifndef ILO_STATE_H
 #define ILO_STATE_H
 
+#include "core/ilo_state_3d.h"
 #include "pipe/p_state.h"
 #include "util/u_dynarray.h"
 
 #include "ilo_common.h"
 
-/**
- * \see brw_context.h
- */
-#define ILO_MAX_DRAW_BUFFERS    8
-#define ILO_MAX_CONST_BUFFERS   (1 + 12)
-#define ILO_MAX_SAMPLER_VIEWS   16
-#define ILO_MAX_SAMPLERS        16
-#define ILO_MAX_SO_BINDINGS     64
-#define ILO_MAX_SO_BUFFERS      4
-#define ILO_MAX_VIEWPORTS       1
-
-#define ILO_MAX_SURFACES        256
-
 /**
  * States that we track.
  *
@@ -131,246 +119,7 @@ enum ilo_dirty_flags {
    ILO_DIRTY_ALL              = 0xffffffff,
 };
 
-struct intel_bo;
-struct ilo_buffer;
 struct ilo_context;
-struct ilo_shader_state;
-struct ilo_texture;
-
-struct ilo_vb_state {
-   struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
-   uint32_t enabled_mask;
-};
-
-struct ilo_ib_state {
-   struct pipe_resource *buffer;
-   const void *user_buffer;
-   unsigned offset;
-   unsigned index_size;
-
-   /* these are not valid until the state is finalized */
-   struct pipe_resource *hw_resource;
-   unsigned hw_index_size;
-   /* an offset to be added to pipe_draw_info::start */
-   int64_t draw_start_offset;
-};
-
-struct ilo_ve_cso {
-   /* VERTEX_ELEMENT_STATE */
-   uint32_t payload[2];
-};
-
-struct ilo_ve_state {
-   struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS];
-   unsigned count;
-
-   unsigned instance_divisors[PIPE_MAX_ATTRIBS];
-   unsigned vb_mapping[PIPE_MAX_ATTRIBS];
-   unsigned vb_count;
-
-   /* these are not valid until the state is finalized */
-   struct ilo_ve_cso edgeflag_cso;
-   bool last_cso_edgeflag;
-
-   struct ilo_ve_cso nosrc_cso;
-   bool prepend_nosrc_cso;
-};
-
-struct ilo_so_state {
-   struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
-   unsigned count;
-   unsigned append_bitmask;
-
-   bool enabled;
-};
-
-struct ilo_viewport_cso {
-   /* matrix form */
-   float m00, m11, m22, m30, m31, m32;
-
-   /* guardband in NDC space */
-   float min_gbx, min_gby, max_gbx, max_gby;
-
-   /* viewport in screen space */
-   float min_x, min_y, min_z;
-   float max_x, max_y, max_z;
-};
-
-struct ilo_viewport_state {
-   struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS];
-   unsigned count;
-
-   struct pipe_viewport_state viewport0;
-};
-
-struct ilo_scissor_state {
-   /* SCISSOR_RECT */
-   uint32_t payload[ILO_MAX_VIEWPORTS * 2];
-
-   struct pipe_scissor_state scissor0;
-};
-
-struct ilo_rasterizer_clip {
-   /* 3DSTATE_CLIP */
-   uint32_t payload[3];
-
-   uint32_t can_enable_guardband;
-};
-
-struct ilo_rasterizer_sf {
-   /* 3DSTATE_SF */
-   uint32_t payload[3];
-   uint32_t dw_msaa;
-
-   /* Global Depth Offset Constant/Scale/Clamp */
-   uint32_t dw_depth_offset_const;
-   uint32_t dw_depth_offset_scale;
-   uint32_t dw_depth_offset_clamp;
-
-   /* Gen8+ 3DSTATE_RASTER */
-   uint32_t dw_raster;
-};
-
-struct ilo_rasterizer_wm {
-   /* 3DSTATE_WM */
-   uint32_t payload[2];
-   uint32_t dw_msaa_rast;
-   uint32_t dw_msaa_disp;
-};
-
-struct ilo_rasterizer_state {
-   struct pipe_rasterizer_state state;
-
-   struct ilo_rasterizer_clip clip;
-   struct ilo_rasterizer_sf sf;
-   struct ilo_rasterizer_wm wm;
-};
-
-struct ilo_dsa_state {
-   /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */
-   uint32_t payload[3];
-
-   uint32_t dw_blend_alpha;
-   uint32_t dw_ps_blend_alpha;
-   ubyte alpha_ref;
-};
-
-struct ilo_blend_cso {
-   /* BLEND_STATE */
-   uint32_t payload[2];
-
-   uint32_t dw_blend;
-   uint32_t dw_blend_dst_alpha_forced_one;
-};
-
-struct ilo_blend_state {
-   struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS];
-
-   bool dual_blend;
-   bool alpha_to_coverage;
-
-   uint32_t dw_shared;
-   uint32_t dw_alpha_mod;
-   uint32_t dw_logicop;
-
-   /* a part of 3DSTATE_PS_BLEND */
-   uint32_t dw_ps_blend;
-   uint32_t dw_ps_blend_dst_alpha_forced_one;
-};
-
-struct ilo_sampler_cso {
-   /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */
-   uint32_t payload[15];
-
-   uint32_t dw_filter;
-   uint32_t dw_filter_aniso;
-   uint32_t dw_wrap;
-   uint32_t dw_wrap_1d;
-   uint32_t dw_wrap_cube;
-
-   bool anisotropic;
-   bool saturate_r;
-   bool saturate_s;
-   bool saturate_t;
-};
-
-struct ilo_sampler_state {
-   const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
-};
-
-struct ilo_view_surface {
-   /* SURFACE_STATE */
-   uint32_t payload[13];
-   struct intel_bo *bo;
-
-   uint32_t scanout;
-};
-
-struct ilo_view_cso {
-   struct pipe_sampler_view base;
-
-   struct ilo_view_surface surface;
-};
-
-struct ilo_view_state {
-   struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
-   unsigned count;
-};
-
-struct ilo_cbuf_cso {
-   struct pipe_resource *resource;
-   struct ilo_view_surface surface;
-
-   /*
-    * this CSO is not so constant because user buffer needs to be uploaded in
-    * finalize_constant_buffers()
-    */
-   const void *user_buffer;
-   unsigned user_buffer_size;
-};
-
-struct ilo_cbuf_state {
-   struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
-   uint32_t enabled_mask;
-};
-
-struct ilo_resource_state {
-   struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
-   unsigned count;
-};
-
-struct ilo_surface_cso {
-   struct pipe_surface base;
-
-   bool is_rt;
-   union {
-      struct ilo_view_surface rt;
-      struct ilo_zs_surface {
-         uint32_t payload[12];
-         uint32_t dw_aligned_8x4;
-
-         struct intel_bo *bo;
-         struct intel_bo *hiz_bo;
-         struct intel_bo *separate_s8_bo;
-      } zs;
-   } u;
-};
-
-struct ilo_fb_state {
-   struct pipe_framebuffer_state state;
-
-   struct ilo_view_surface null_rt;
-   struct ilo_zs_surface null_zs;
-
-   struct ilo_fb_blend_caps {
-      bool can_logicop;
-      bool can_blend;
-      bool can_alpha_test;
-      bool dst_alpha_forced_one;
-   } blend_caps[PIPE_MAX_COLOR_BUFS];
-
-   unsigned num_samples;
-};
 
 struct ilo_global_binding_cso {
    struct pipe_resource *resource;
@@ -396,10 +145,6 @@ struct ilo_global_binding {
    unsigned count;
 };
 
-struct ilo_shader_cso {
-   uint32_t payload[5];
-};
-
 struct ilo_state_vector {
    const struct pipe_draw_info *draw;
 
diff --git a/src/gallium/drivers/ilo/ilo_state_3d.h b/src/gallium/drivers/ilo/ilo_state_3d.h
deleted file mode 100644 (file)
index b504390..0000000
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#ifndef ILO_STATE_3D_H
-#define ILO_STATE_3D_H
-
-#include "genhw/genhw.h"
-#include "core/intel_winsys.h"
-
-#include "ilo_common.h"
-#include "ilo_state.h"
-
-/**
- * Translate a pipe texture target to the matching hardware surface type.
- */
-static inline int
-ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
-{
-   switch (target) {
-   case PIPE_BUFFER:
-      return GEN6_SURFTYPE_BUFFER;
-   case PIPE_TEXTURE_1D:
-   case PIPE_TEXTURE_1D_ARRAY:
-      return GEN6_SURFTYPE_1D;
-   case PIPE_TEXTURE_2D:
-   case PIPE_TEXTURE_RECT:
-   case PIPE_TEXTURE_2D_ARRAY:
-      return GEN6_SURFTYPE_2D;
-   case PIPE_TEXTURE_3D:
-      return GEN6_SURFTYPE_3D;
-   case PIPE_TEXTURE_CUBE:
-   case PIPE_TEXTURE_CUBE_ARRAY:
-      return GEN6_SURFTYPE_CUBE;
-   default:
-      assert(!"unknown texture target");
-      return GEN6_SURFTYPE_BUFFER;
-   }
-}
-
-void
-ilo_gpe_init_ve(const struct ilo_dev *dev,
-                unsigned num_states,
-                const struct pipe_vertex_element *states,
-                struct ilo_ve_state *ve);
-
-void
-ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
-                        struct ilo_ve_cso *cso);
-
-void
-ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
-                      int comp0, int comp1, int comp2, int comp3,
-                      struct ilo_ve_cso *cso);
-
-void
-ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
-                         const struct pipe_viewport_state *state,
-                         struct ilo_viewport_cso *vp);
-
-void
-ilo_gpe_set_scissor(const struct ilo_dev *dev,
-                    unsigned start_slot,
-                    unsigned num_states,
-                    const struct pipe_scissor_state *states,
-                    struct ilo_scissor_state *scissor);
-
-void
-ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
-                         struct ilo_scissor_state *scissor);
-
-void
-ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_state *rasterizer);
-void
-ilo_gpe_init_dsa(const struct ilo_dev *dev,
-                 const struct pipe_depth_stencil_alpha_state *state,
-                 struct ilo_dsa_state *dsa);
-
-void
-ilo_gpe_init_blend(const struct ilo_dev *dev,
-                   const struct pipe_blend_state *state,
-                   struct ilo_blend_state *blend);
-
-void
-ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
-                         const struct pipe_sampler_state *state,
-                         struct ilo_sampler_cso *sampler);
-
-void
-ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
-                               unsigned width, unsigned height,
-                               unsigned depth, unsigned level,
-                               struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
-                                     const struct ilo_buffer *buf,
-                                     unsigned offset, unsigned size,
-                                     unsigned struct_size,
-                                     enum pipe_format elem_format,
-                                     bool is_rt, bool render_cache_rw,
-                                     struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev,
-                                      const struct ilo_texture *tex,
-                                      enum pipe_format format,
-                                      unsigned first_level,
-                                      unsigned num_levels,
-                                      unsigned first_layer,
-                                      unsigned num_layers,
-                                      bool is_rt,
-                                      struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
-                        const struct ilo_texture *tex,
-                        enum pipe_format format, unsigned level,
-                        unsigned first_layer, unsigned num_layers,
-                        struct ilo_zs_surface *zs);
-
-void
-ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *vs,
-                    struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *gs,
-                    struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *fs,
-                    struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_set_fb(const struct ilo_dev *dev,
-               const struct pipe_framebuffer_state *state,
-               struct ilo_fb_state *fb);
-
-#endif /* ILO_STATE_3D_H */
diff --git a/src/gallium/drivers/ilo/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/ilo_state_3d_bottom.c
deleted file mode 100644 (file)
index 13c1a7f..0000000
+++ /dev/null
@@ -1,2225 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#include "genhw/genhw.h"
-#include "core/ilo_format.h"
-#include "util/u_dual_blend.h"
-#include "util/u_framebuffer.h"
-#include "util/u_half.h"
-
-#include "ilo_context.h"
-#include "ilo_resource.h"
-#include "ilo_shader.h"
-#include "ilo_state.h"
-#include "ilo_state_3d.h"
-
-static void
-rasterizer_init_clip(const struct ilo_dev *dev,
-                     const struct pipe_rasterizer_state *state,
-                     struct ilo_rasterizer_clip *clip)
-{
-   uint32_t dw1, dw2, dw3;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   dw1 = GEN6_CLIP_DW1_STATISTICS;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 219:
-       *
-       *     "Workaround : Due to Hardware issue "EarlyCull" needs to be
-       *      enabled only for the cases where the incoming primitive topology
-       *      into the clipper guaranteed to be Trilist."
-       *
-       * What does this mean?
-       */
-      dw1 |= 0 << 19 |
-             GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
-
-      if (ilo_dev_gen(dev) < ILO_GEN(8)) {
-         if (state->front_ccw)
-            dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
-
-         switch (state->cull_face) {
-         case PIPE_FACE_NONE:
-            dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE;
-            break;
-         case PIPE_FACE_FRONT:
-            dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT;
-            break;
-         case PIPE_FACE_BACK:
-            dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK;
-            break;
-         case PIPE_FACE_FRONT_AND_BACK:
-            dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH;
-            break;
-         }
-      }
-   }
-
-   dw2 = GEN6_CLIP_DW2_CLIP_ENABLE |
-         GEN6_CLIP_DW2_XY_TEST_ENABLE |
-         state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
-         GEN6_CLIP_DW2_CLIPMODE_NORMAL;
-
-   if (state->clip_halfz)
-      dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
-   else
-      dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
-
-   if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip)
-      dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
-
-   if (state->flatshade_first) {
-      dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
-             0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
-             1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
-   }
-   else {
-      dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
-             1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
-             2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
-   }
-
-   dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
-         0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT;
-
-   clip->payload[0] = dw1;
-   clip->payload[1] = dw2;
-   clip->payload[2] = dw3;
-
-   clip->can_enable_guardband = true;
-
-   /*
-    * There are several reasons that guard band test should be disabled
-    *
-    *  - GL wide points (to avoid partially visibie object)
-    *  - GL wide or AA lines (to avoid partially visibie object)
-    */
-   if (state->point_size_per_vertex || state->point_size > 1.0f)
-      clip->can_enable_guardband = false;
-   if (state->line_smooth || state->line_width > 1.0f)
-      clip->can_enable_guardband = false;
-}
-
-static void
-rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev,
-                                     const struct pipe_rasterizer_state *state,
-                                     struct ilo_rasterizer_sf *sf)
-{
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   /*
-    * Scale the constant term.  The minimum representable value used by the HW
-    * is not large enouch to be the minimum resolvable difference.
-    */
-   sf->dw_depth_offset_const = fui(state->offset_units * 2.0f);
-   sf->dw_depth_offset_scale = fui(state->offset_scale);
-   sf->dw_depth_offset_clamp = fui(state->offset_clamp);
-}
-
-static void
-rasterizer_init_sf_gen6(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_sf *sf)
-{
-   int line_width, point_width;
-   uint32_t dw1, dw2, dw3;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 248:
-    *
-    *     "This bit (Statistics Enable) should be set whenever clipping is
-    *      enabled and the Statistics Enable bit is set in CLIP_STATE. It
-    *      should be cleared if clipping is disabled or Statistics Enable in
-    *      CLIP_STATE is clear."
-    */
-   dw1 = GEN7_SF_DW1_STATISTICS |
-         GEN7_SF_DW1_VIEWPORT_ENABLE;
-
-   /* XXX GEN6 path seems to work fine for GEN7 */
-   if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 258:
-       *
-       *     "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
-       *      Enable Solid , Global Depth Offset Enable Wireframe, and Global
-       *      Depth Offset Enable Point) should be set whenever non zero depth
-       *      bias (Slope, Bias) values are used. Setting this bit may have
-       *      some degradation of performance for some workloads."
-       */
-      if (state->offset_tri || state->offset_line || state->offset_point) {
-         /* XXX need to scale offset_const according to the depth format */
-         dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET;
-
-         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID |
-                GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME |
-                GEN7_SF_DW1_DEPTH_OFFSET_POINT;
-      }
-   } else {
-      if (state->offset_tri)
-         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
-      if (state->offset_line)
-         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
-      if (state->offset_point)
-         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
-   }
-
-   switch (state->fill_front) {
-   case PIPE_POLYGON_MODE_FILL:
-      dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID;
-      break;
-   case PIPE_POLYGON_MODE_LINE:
-      dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME;
-      break;
-   case PIPE_POLYGON_MODE_POINT:
-      dw1 |= GEN7_SF_DW1_FRONTFACE_POINT;
-      break;
-   }
-
-   switch (state->fill_back) {
-   case PIPE_POLYGON_MODE_FILL:
-      dw1 |= GEN7_SF_DW1_BACKFACE_SOLID;
-      break;
-   case PIPE_POLYGON_MODE_LINE:
-      dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME;
-      break;
-   case PIPE_POLYGON_MODE_POINT:
-      dw1 |= GEN7_SF_DW1_BACKFACE_POINT;
-      break;
-   }
-
-   if (state->front_ccw)
-      dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW;
-
-   dw2 = 0;
-
-   if (state->line_smooth) {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 251:
-       *
-       *     "This field (Anti-aliasing Enable) must be disabled if any of the
-       *      render targets have integer (UINT or SINT) surface format."
-       *
-       * From the Sandy Bridge PRM, volume 2 part 1, page 317:
-       *
-       *     "This field (Hierarchical Depth Buffer Enable) must be disabled
-       *      if Anti-aliasing Enable in 3DSTATE_SF is enabled.
-       *
-       * TODO We do not check those yet.
-       */
-      dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE |
-             GEN7_SF_DW2_AA_LINE_CAP_1_0;
-   }
-
-   switch (state->cull_face) {
-   case PIPE_FACE_NONE:
-      dw2 |= GEN7_SF_DW2_CULLMODE_NONE;
-      break;
-   case PIPE_FACE_FRONT:
-      dw2 |= GEN7_SF_DW2_CULLMODE_FRONT;
-      break;
-   case PIPE_FACE_BACK:
-      dw2 |= GEN7_SF_DW2_CULLMODE_BACK;
-      break;
-   case PIPE_FACE_FRONT_AND_BACK:
-      dw2 |= GEN7_SF_DW2_CULLMODE_BOTH;
-      break;
-   }
-
-   /*
-    * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
-    * pixels in the minor direction.  We have to make the lines slightly
-    * thicker, 0.5 pixel on both sides, so that they intersect that many
-    * pixels are considered into the lines.
-    *
-    * Line width is in U3.7.
-    */
-   line_width = (int)
-      ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
-   line_width = CLAMP(line_width, 0, 1023);
-
-   /* use GIQ rules */
-   if (line_width == 128 && !state->line_smooth)
-      line_width = 0;
-
-   dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
-
-   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable)
-      dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
-
-   if (state->scissor)
-      dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
-
-   dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
-         GEN7_SF_DW3_SUBPIXEL_8BITS;
-
-   if (state->line_last_pixel)
-      dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
-
-   if (state->flatshade_first) {
-      dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
-             0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
-             1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
-   } else {
-      dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
-             1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
-             2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
-   }
-
-   if (!state->point_size_per_vertex)
-      dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
-
-   /* in U8.3 */
-   point_width = (int) (state->point_size * 8.0f + 0.5f);
-   point_width = CLAMP(point_width, 1, 2047);
-
-   dw3 |= point_width;
-
-   STATIC_ASSERT(Elements(sf->payload) >= 3);
-   sf->payload[0] = dw1;
-   sf->payload[1] = dw2;
-   sf->payload[2] = dw3;
-
-   if (state->multisample) {
-      sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN;
-
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 251:
-       *
-       *     "Software must not program a value of 0.0 when running in
-       *      MSRASTMODE_ON_xxx modes - zero-width lines are not available
-       *      when multisampling rasterization is enabled."
-       */
-      if (!line_width) {
-         line_width = 128; /* 1.0f */
-
-         sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
-      }
-   } else {
-      sf->dw_msaa = 0;
-   }
-
-   rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
-   /* 3DSTATE_RASTER is Gen8+ only */
-   sf->dw_raster = 0;
-}
-
-static uint32_t
-rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev,
-                              const struct pipe_rasterizer_state *state)
-{
-   uint32_t dw = 0;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (state->front_ccw)
-      dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW;
-
-   switch (state->cull_face) {
-   case PIPE_FACE_NONE:
-      dw |= GEN8_RASTER_DW1_CULLMODE_NONE;
-      break;
-   case PIPE_FACE_FRONT:
-      dw |= GEN8_RASTER_DW1_CULLMODE_FRONT;
-      break;
-   case PIPE_FACE_BACK:
-      dw |= GEN8_RASTER_DW1_CULLMODE_BACK;
-      break;
-   case PIPE_FACE_FRONT_AND_BACK:
-      dw |= GEN8_RASTER_DW1_CULLMODE_BOTH;
-      break;
-   }
-
-   if (state->point_smooth)
-      dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE;
-
-   if (state->multisample)
-      dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
-
-   if (state->offset_tri)
-      dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
-   if (state->offset_line)
-      dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME;
-   if (state->offset_point)
-      dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT;
-
-   switch (state->fill_front) {
-   case PIPE_POLYGON_MODE_FILL:
-      dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID;
-      break;
-   case PIPE_POLYGON_MODE_LINE:
-      dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME;
-      break;
-   case PIPE_POLYGON_MODE_POINT:
-      dw |= GEN8_RASTER_DW1_FRONTFACE_POINT;
-      break;
-   }
-
-   switch (state->fill_back) {
-   case PIPE_POLYGON_MODE_FILL:
-      dw |= GEN8_RASTER_DW1_BACKFACE_SOLID;
-      break;
-   case PIPE_POLYGON_MODE_LINE:
-      dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME;
-      break;
-   case PIPE_POLYGON_MODE_POINT:
-      dw |= GEN8_RASTER_DW1_BACKFACE_POINT;
-      break;
-   }
-
-   if (state->line_smooth)
-      dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
-
-   if (state->scissor)
-      dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE;
-
-   if (state->depth_clip)
-      dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE;
-
-   return dw;
-}
-
-static void
-rasterizer_init_sf_gen8(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_sf *sf)
-{
-   int line_width, point_width;
-   uint32_t dw1, dw2, dw3;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   /* in U3.7 */
-   line_width = (int)
-      ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
-   line_width = CLAMP(line_width, 0, 1023);
-
-   /* use GIQ rules */
-   if (line_width == 128 && !state->line_smooth)
-      line_width = 0;
-
-   /* in U8.3 */
-   point_width = (int) (state->point_size * 8.0f + 0.5f);
-   point_width = CLAMP(point_width, 1, 2047);
-
-   dw1 = GEN7_SF_DW1_STATISTICS |
-         GEN7_SF_DW1_VIEWPORT_ENABLE;
-
-   dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
-   if (state->line_smooth)
-      dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0;
-
-   dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
-         GEN7_SF_DW3_SUBPIXEL_8BITS |
-         point_width;
-
-   if (state->line_last_pixel)
-      dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
-
-   if (state->flatshade_first) {
-      dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
-             0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
-             1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
-   } else {
-      dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
-             1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
-             2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
-   }
-
-   if (!state->point_size_per_vertex)
-      dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
-
-   dw3 |= point_width;
-
-   STATIC_ASSERT(Elements(sf->payload) >= 3);
-   sf->payload[0] = dw1;
-   sf->payload[1] = dw2;
-   sf->payload[2] = dw3;
-
-   rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
-
-   sf->dw_msaa = 0;
-   sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state);
-}
-
-static void
-rasterizer_init_wm_gen6(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_wm *wm)
-{
-   uint32_t dw5, dw6;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   /* only the FF unit states are set, as in GEN7 */
-
-   dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
-
-   /* same value as in 3DSTATE_SF */
-   if (state->line_smooth)
-      dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0;
-
-   if (state->poly_stipple_enable)
-      dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
-   if (state->line_stipple_enable)
-      dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
-
-   /*
-    * assertion that makes sure
-    *
-    *   dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
-    *
-    * is valid
-    */
-   STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 &&
-                 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0);
-   dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL;
-
-   if (state->bottom_edge_rule)
-      dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
-
-   wm->dw_msaa_rast =
-      (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0;
-   wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
-
-   STATIC_ASSERT(Elements(wm->payload) >= 2);
-   wm->payload[0] = dw5;
-   wm->payload[1] = dw6;
-}
-
-static void
-rasterizer_init_wm_gen7(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_wm *wm)
-{
-   uint32_t dw1, dw2;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   /*
-    * assertion that makes sure
-    *
-    *   dw1 |= wm->dw_msaa_rast;
-    *   dw2 |= wm->dw_msaa_disp;
-    *
-    * is valid
-    */
-   STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 &&
-                 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0);
-   dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL |
-         GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
-   dw2 = 0;
-
-   /* same value as in 3DSTATE_SF */
-   if (state->line_smooth)
-      dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
-
-   if (state->poly_stipple_enable)
-      dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
-   if (state->line_stipple_enable)
-      dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
-
-   if (state->bottom_edge_rule)
-      dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
-
-   wm->dw_msaa_rast =
-      (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0;
-   wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
-
-   STATIC_ASSERT(Elements(wm->payload) >= 2);
-   wm->payload[0] = dw1;
-   wm->payload[1] = dw2;
-}
-
-static uint32_t
-rasterizer_get_wm_gen8(const struct ilo_dev *dev,
-                       const struct pipe_rasterizer_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   dw = GEN7_WM_DW1_ZW_INTERP_PIXEL |
-        GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
-
-   /* same value as in 3DSTATE_SF */
-   if (state->line_smooth)
-      dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
-
-   if (state->poly_stipple_enable)
-      dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
-   if (state->line_stipple_enable)
-      dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
-
-   if (state->bottom_edge_rule)
-      dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
-
-   return dw;
-}
-
-void
-ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_state *rasterizer)
-{
-   rasterizer_init_clip(dev, state, &rasterizer->clip);
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      memset(&rasterizer->wm, 0, sizeof(rasterizer->wm));
-      rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state);
-
-      rasterizer_init_sf_gen8(dev, state, &rasterizer->sf);
-   } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      rasterizer_init_wm_gen7(dev, state, &rasterizer->wm);
-      rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
-   } else {
-      rasterizer_init_wm_gen6(dev, state, &rasterizer->wm);
-      rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
-   }
-}
-
-static void
-fs_init_cso_gen6(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *fs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, input_count, sampler_count, interps, max_threads;
-   uint32_t dw2, dw4, dw5, dw6;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
-   input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
-   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-   interps = ilo_shader_get_kernel_param(fs,
-         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
-
-   /* see brwCreateContext() */
-   max_threads = (dev->gt == 2) ? 80 : 40;
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
-         0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
-         0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
-
-   dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
-    *
-    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
-    *      PS kernel or color calculator has the ability to kill (discard)
-    *      pixels or samples, other than due to depth or stencil testing.
-    *      This bit is required to be ENABLED in the following situations:
-    *
-    *      The API pixel shader program contains "killpix" or "discard"
-    *      instructions, or other code in the pixel shader kernel that can
-    *      cause the final pixel mask to differ from the pixel mask received
-    *      on dispatch.
-    *
-    *      A sampler with chroma key enabled with kill pixel mode is used by
-    *      the pixel shader.
-    *
-    *      Any render target has Alpha Test Enable or AlphaToCoverage Enable
-    *      enabled.
-    *
-    *      The pixel shader kernel generates and outputs oMask.
-    *
-    *      Note: As ClipDistance clipping is fully supported in hardware and
-    *      therefore not via PS instructions, there should be no need to
-    *      ENABLE this bit due to ClipDistance clipping."
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
-    *
-    *     "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
-    *      field must be set to disabled."
-    *
-    * TODO This is not checked yet.
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw5 |= GEN6_WM_DW5_PS_USE_W;
-
-   /*
-    * TODO set this bit only when
-    *
-    *  a) fs writes colors and color is not masked, or
-    *  b) fs writes depth, or
-    *  c) fs or cc kills
-    */
-   if (true)
-      dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
-
-   dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
-         GEN6_WM_DW6_PS_POSOFFSET_NONE |
-         interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 4);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-   cso->payload[3] = dw6;
-}
-
-static uint32_t
-fs_get_wm_gen7(const struct ilo_dev *dev,
-               const struct ilo_shader_state *fs)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   dw = ilo_shader_get_kernel_param(fs,
-         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
-      GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
-
-   /*
-    * TODO set this bit only when
-    *
-    *  a) fs writes colors and color is not masked, or
-    *  b) fs writes depth, or
-    *  c) fs or cc kills
-    */
-   dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 278:
-    *
-    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
-    *      the PS kernel or color calculator has the ability to kill
-    *      (discard) pixels or samples, other than due to depth or stencil
-    *      testing. This bit is required to be ENABLED in the following
-    *      situations:
-    *
-    *      - The API pixel shader program contains "killpix" or "discard"
-    *        instructions, or other code in the pixel shader kernel that
-    *        can cause the final pixel mask to differ from the pixel mask
-    *        received on dispatch.
-    *
-    *      - A sampler with chroma key enabled with kill pixel mode is used
-    *        by the pixel shader.
-    *
-    *      - Any render target has Alpha Test Enable or AlphaToCoverage
-    *        Enable enabled.
-    *
-    *      - The pixel shader kernel generates and outputs oMask.
-    *
-    *      Note: As ClipDistance clipping is fully supported in hardware
-    *      and therefore not via PS instructions, there should be no need
-    *      to ENABLE this bit due to ClipDistance clipping."
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw |= GEN7_WM_DW1_PS_KILL_PIXEL;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw |= GEN7_WM_DW1_PSCDEPTH_ON;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw |= GEN7_WM_DW1_PS_USE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw |= GEN7_WM_DW1_PS_USE_W;
-
-   return dw;
-}
-
-static void
-fs_init_cso_gen7(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *fs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, sampler_count, max_threads;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
-   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = GEN7_PS_DW4_POSOFFSET_NONE;
-
-   /* see brwCreateContext() */
-   switch (ilo_dev_gen(dev)) {
-   case ILO_GEN(7.5):
-      max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
-      dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
-      dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
-      break;
-   case ILO_GEN(7):
-   default:
-      max_threads = (dev->gt == 2) ? 172 : 48;
-      dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
-      break;
-   }
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
-      dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
-      dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
-
-   dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
-         0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
-         0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 4);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-   cso->payload[3] = fs_get_wm_gen7(dev, fs);
-}
-
-static uint32_t
-fs_get_psx_gen8(const struct ilo_dev *dev,
-                const struct ilo_shader_state *fs)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   dw = GEN8_PSX_DW1_DISPATCH_ENABLE;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw |= GEN8_PSX_DW1_KILL_PIXEL;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw |= GEN8_PSX_DW1_PSCDEPTH_ON;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw |= GEN8_PSX_DW1_USE_DEPTH;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw |= GEN8_PSX_DW1_USE_W;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
-      dw |= GEN8_PSX_DW1_ATTR_ENABLE;
-
-   return dw;
-}
-
-static uint32_t
-fs_get_wm_gen8(const struct ilo_dev *dev,
-               const struct ilo_shader_state *fs)
-{
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   return ilo_shader_get_kernel_param(fs,
-         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
-      GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
-}
-
-static void
-fs_init_cso_gen8(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *fs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, sampler_count;
-   uint32_t dw3, dw6, dw7;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
-   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
-   dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   /* always 64? */
-   dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT |
-         GEN8_PS_DW6_POSOFFSET_NONE;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
-      dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
-
-   dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
-         0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
-         0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 5);
-   cso->payload[0] = dw3;
-   cso->payload[1] = dw6;
-   cso->payload[2] = dw7;
-   cso->payload[3] = fs_get_psx_gen8(dev, fs);
-   cso->payload[4] = fs_get_wm_gen8(dev, fs);
-}
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *fs,
-                    struct ilo_shader_cso *cso)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(8))
-      fs_init_cso_gen8(dev, fs, cso);
-   else if (ilo_dev_gen(dev) >= ILO_GEN(7))
-      fs_init_cso_gen7(dev, fs, cso);
-   else
-      fs_init_cso_gen6(dev, fs, cso);
-}
-
-struct ilo_zs_surface_info {
-   int surface_type;
-   int format;
-
-   struct {
-      struct intel_bo *bo;
-      unsigned stride;
-      unsigned qpitch;
-      enum gen_surface_tiling tiling;
-      uint32_t offset;
-   } zs, stencil, hiz;
-
-   unsigned width, height, depth;
-   unsigned lod, first_layer, num_layers;
-};
-
-static void
-zs_init_info_null(const struct ilo_dev *dev,
-                  struct ilo_zs_surface_info *info)
-{
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   memset(info, 0, sizeof(*info));
-
-   info->surface_type = GEN6_SURFTYPE_NULL;
-   info->format = GEN6_ZFORMAT_D32_FLOAT;
-   info->width = 1;
-   info->height = 1;
-   info->depth = 1;
-   info->num_layers = 1;
-}
-
-static void
-zs_init_info(const struct ilo_dev *dev,
-             const struct ilo_texture *tex,
-             enum pipe_format format, unsigned level,
-             unsigned first_layer, unsigned num_layers,
-             struct ilo_zs_surface_info *info)
-{
-   bool separate_stencil;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   memset(info, 0, sizeof(*info));
-
-   info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
-
-   if (info->surface_type == GEN6_SURFTYPE_CUBE) {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
-       *
-       *     "For Other Surfaces (Cube Surfaces):
-       *      This field (Minimum Array Element) is ignored."
-       *
-       *     "For Other Surfaces (Cube Surfaces):
-       *      This field (Render Target View Extent) is ignored."
-       *
-       * As such, we cannot set first_layer and num_layers on cube surfaces.
-       * To work around that, treat it as a 2D surface.
-       */
-      info->surface_type = GEN6_SURFTYPE_2D;
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      separate_stencil = true;
-   }
-   else {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 317:
-       *
-       *     "This field (Separate Stencil Buffer Enable) must be set to the
-       *      same value (enabled or disabled) as Hierarchical Depth Buffer
-       *      Enable."
-       */
-      separate_stencil =
-         ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers);
-   }
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 317:
-    *
-    *     "If this field (Hierarchical Depth Buffer Enable) is enabled, the
-    *      Surface Format of the depth buffer cannot be
-    *      D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
-    *      requires the separate stencil buffer."
-    *
-    * From the Ironlake PRM, volume 2 part 1, page 330:
-    *
-    *     "If this field (Separate Stencil Buffer Enable) is disabled, the
-    *      Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
-    *
-    * There is no similar restriction for GEN6.  But when D24_UNORM_X8_UINT
-    * is indeed used, the depth values output by the fragment shaders will
-    * be different when read back.
-    *
-    * As for GEN7+, separate_stencil is always true.
-    */
-   switch (format) {
-   case PIPE_FORMAT_Z16_UNORM:
-      info->format = GEN6_ZFORMAT_D16_UNORM;
-      break;
-   case PIPE_FORMAT_Z32_FLOAT:
-      info->format = GEN6_ZFORMAT_D32_FLOAT;
-      break;
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-      info->format = (separate_stencil) ?
-         GEN6_ZFORMAT_D24_UNORM_X8_UINT :
-         GEN6_ZFORMAT_D24_UNORM_S8_UINT;
-      break;
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-      info->format = (separate_stencil) ?
-         GEN6_ZFORMAT_D32_FLOAT :
-         GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
-      break;
-   case PIPE_FORMAT_S8_UINT:
-      if (separate_stencil) {
-         info->format = GEN6_ZFORMAT_D32_FLOAT;
-         break;
-      }
-      /* fall through */
-   default:
-      assert(!"unsupported depth/stencil format");
-      zs_init_info_null(dev, info);
-      return;
-      break;
-   }
-
-   if (format != PIPE_FORMAT_S8_UINT) {
-      info->zs.bo = tex->image.bo;
-      info->zs.stride = tex->image.bo_stride;
-
-      assert(tex->image.layer_height % 4 == 0);
-      info->zs.qpitch = tex->image.layer_height / 4;
-
-      info->zs.tiling = tex->image.tiling;
-      info->zs.offset = 0;
-   }
-
-   if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
-      const struct ilo_texture *s8_tex =
-         (tex->separate_s8) ? tex->separate_s8 : tex;
-
-      info->stencil.bo = s8_tex->image.bo;
-
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 329:
-       *
-       *     "The pitch must be set to 2x the value computed based on width,
-       *       as the stencil buffer is stored with two rows interleaved."
-       *
-       * For GEN7, we still dobule the stride because we did not double the
-       * slice widths when initializing the layout.
-       */
-      info->stencil.stride = s8_tex->image.bo_stride * 2;
-
-      assert(s8_tex->image.layer_height % 4 == 0);
-      info->stencil.qpitch = s8_tex->image.layer_height / 4;
-
-      info->stencil.tiling = s8_tex->image.tiling;
-
-      if (ilo_dev_gen(dev) == ILO_GEN(6)) {
-         unsigned x, y;
-
-         assert(s8_tex->image.walk == ILO_IMAGE_WALK_LOD);
-
-         /* offset to the level */
-         ilo_image_get_slice_pos(&s8_tex->image, level, 0, &x, &y);
-         ilo_image_pos_to_mem(&s8_tex->image, x, y, &x, &y);
-         info->stencil.offset = ilo_image_mem_to_raw(&s8_tex->image, x, y);
-      }
-   }
-
-   if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) {
-      info->hiz.bo = tex->image.aux_bo;
-      info->hiz.stride = tex->image.aux_stride;
-
-      assert(tex->image.aux_layer_height % 4 == 0);
-      info->hiz.qpitch = tex->image.aux_layer_height / 4;
-
-      info->hiz.tiling = GEN6_TILING_Y;
-
-      /* offset to the level */
-      if (ilo_dev_gen(dev) == ILO_GEN(6))
-         info->hiz.offset = tex->image.aux_offsets[level];
-   }
-
-   info->width = tex->image.width0;
-   info->height = tex->image.height0;
-   info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
-      tex->base.depth0 : num_layers;
-
-   info->lod = level;
-   info->first_layer = first_layer;
-   info->num_layers = num_layers;
-}
-
-void
-ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
-                        const struct ilo_texture *tex,
-                        enum pipe_format format, unsigned level,
-                        unsigned first_layer, unsigned num_layers,
-                        struct ilo_zs_surface *zs)
-{
-   const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
-   const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
-   struct ilo_zs_surface_info info;
-   uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
-   int align_w = 8, align_h = 4;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   if (tex) {
-      zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
-
-      switch (tex->base.nr_samples) {
-      case 2:
-         align_w /= 2;
-         break;
-      case 4:
-         align_w /= 2;
-         align_h /= 2;
-         break;
-      case 8:
-         align_w /= 4;
-         align_h /= 2;
-         break;
-      case 16:
-         align_w /= 4;
-         align_h /= 4;
-         break;
-      default:
-         break;
-      }
-   } else {
-      zs_init_info_null(dev, &info);
-   }
-
-   switch (info.surface_type) {
-   case GEN6_SURFTYPE_NULL:
-      break;
-   case GEN6_SURFTYPE_1D:
-      assert(info.width <= max_2d_size && info.height == 1 &&
-             info.depth <= max_array_size);
-      assert(info.first_layer < max_array_size - 1 &&
-             info.num_layers <= max_array_size);
-      break;
-   case GEN6_SURFTYPE_2D:
-      assert(info.width <= max_2d_size && info.height <= max_2d_size &&
-             info.depth <= max_array_size);
-      assert(info.first_layer < max_array_size - 1 &&
-             info.num_layers <= max_array_size);
-      break;
-   case GEN6_SURFTYPE_3D:
-      assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
-      assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
-      break;
-   case GEN6_SURFTYPE_CUBE:
-      assert(info.width <= max_2d_size && info.height <= max_2d_size &&
-             info.depth == 1);
-      assert(info.first_layer == 0 && info.num_layers == 1);
-      assert(info.width == info.height);
-      break;
-   default:
-      assert(!"unexpected depth surface type");
-      break;
-   }
-
-   dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT |
-         info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
-
-   if (info.zs.bo) {
-      /* required for GEN6+ */
-      assert(info.zs.tiling == GEN6_TILING_Y);
-      assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
-            info.zs.stride % 128 == 0);
-      assert(info.width <= info.zs.stride);
-
-      dw1 |= (info.zs.stride - 1);
-      dw2 = info.zs.offset;
-   } else {
-      dw2 = 0;
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      if (info.zs.bo)
-         dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
-
-      if (info.stencil.bo)
-         dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE;
-
-      if (info.hiz.bo)
-         dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
-
-      dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
-            (info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
-            info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
-
-      zs->dw_aligned_8x4 =
-         (align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
-         (align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
-         info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
-
-      dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT |
-            info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT;
-
-      dw5 = 0;
-
-      dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT;
-
-      if (ilo_dev_gen(dev) >= ILO_GEN(8))
-         dw6 |= info.zs.qpitch;
-   } else {
-      /* always Y-tiled */
-      dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT;
-
-      if (info.hiz.bo) {
-         dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
-                GEN6_DEPTH_DW1_SEPARATE_STENCIL;
-      }
-
-      dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
-            (info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
-            info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
-            GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
-
-      zs->dw_aligned_8x4 =
-         (align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
-         (align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
-         info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
-         GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
-
-      dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT |
-            info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT |
-            (info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT;
-
-      dw5 = 0;
-
-      dw6 = 0;
-   }
-
-   STATIC_ASSERT(Elements(zs->payload) >= 12);
-
-   zs->payload[0] = dw1;
-   zs->payload[1] = dw2;
-   zs->payload[2] = dw3;
-   zs->payload[3] = dw4;
-   zs->payload[4] = dw5;
-   zs->payload[5] = dw6;
-
-   /* do not increment reference count */
-   zs->bo = info.zs.bo;
-
-   /* separate stencil */
-   if (info.stencil.bo) {
-      assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
-             info.stencil.stride % 128 == 0);
-
-      dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT;
-      if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
-         dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
-
-      dw2 = info.stencil.offset;
-      dw4 = info.stencil.qpitch;
-   } else {
-      dw1 = 0;
-      dw2 = 0;
-      dw4 = 0;
-   }
-
-   zs->payload[6] = dw1;
-   zs->payload[7] = dw2;
-   zs->payload[8] = dw4;
-   /* do not increment reference count */
-   zs->separate_s8_bo = info.stencil.bo;
-
-   /* hiz */
-   if (info.hiz.bo) {
-      dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT;
-      dw2 = info.hiz.offset;
-      dw4 = info.hiz.qpitch;
-   } else {
-      dw1 = 0;
-      dw2 = 0;
-      dw4 = 0;
-   }
-
-   zs->payload[9] = dw1;
-   zs->payload[10] = dw2;
-   zs->payload[11] = dw4;
-   /* do not increment reference count */
-   zs->hiz_bo = info.hiz.bo;
-}
-
-static void
-viewport_get_guardband(const struct ilo_dev *dev,
-                       int center_x, int center_y,
-                       int *min_gbx, int *max_gbx,
-                       int *min_gby, int *max_gby)
-{
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 234:
-    *
-    *     "Per-Device Guardband Extents
-    *
-    *       - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
-    *       - Maximum Post-Clamp Delta (X or Y): 16K"
-    *
-    *     "In addition, in order to be correctly rendered, objects must have a
-    *      screenspace bounding box not exceeding 8K in the X or Y direction.
-    *      This additional restriction must also be comprehended by software,
-    *      i.e., enforced by use of clipping."
-    *
-    * From the Ivy Bridge PRM, volume 2 part 1, page 248:
-    *
-    *     "Per-Device Guardband Extents
-    *
-    *       - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
-    *       - Maximum Post-Clamp Delta (X or Y): N/A"
-    *
-    *     "In addition, in order to be correctly rendered, objects must have a
-    *      screenspace bounding box not exceeding 8K in the X or Y direction.
-    *      This additional restriction must also be comprehended by software,
-    *      i.e., enforced by use of clipping."
-    *
-    * Combined, the bounding box of any object can not exceed 8K in both
-    * width and height.
-    *
-    * Below we set the guardband as a squre of length 8K, centered at where
-    * the viewport is.  This makes sure all objects passing the GB test are
-    * valid to the renderer, and those failing the XY clipping have a
-    * better chance of passing the GB test.
-    */
-   const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384;
-   const int half_len = 8192 / 2;
-
-   /* make sure the guardband is within the valid range */
-   if (center_x - half_len < -max_extent)
-      center_x = -max_extent + half_len;
-   else if (center_x + half_len > max_extent - 1)
-      center_x = max_extent - half_len;
-
-   if (center_y - half_len < -max_extent)
-      center_y = -max_extent + half_len;
-   else if (center_y + half_len > max_extent - 1)
-      center_y = max_extent - half_len;
-
-   *min_gbx = (float) (center_x - half_len);
-   *max_gbx = (float) (center_x + half_len);
-   *min_gby = (float) (center_y - half_len);
-   *max_gby = (float) (center_y + half_len);
-}
-
-void
-ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
-                         const struct pipe_viewport_state *state,
-                         struct ilo_viewport_cso *vp)
-{
-   const float scale_x = fabs(state->scale[0]);
-   const float scale_y = fabs(state->scale[1]);
-   const float scale_z = fabs(state->scale[2]);
-   int min_gbx, max_gbx, min_gby, max_gby;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   viewport_get_guardband(dev,
-         (int) state->translate[0],
-         (int) state->translate[1],
-         &min_gbx, &max_gbx, &min_gby, &max_gby);
-
-   /* matrix form */
-   vp->m00 = state->scale[0];
-   vp->m11 = state->scale[1];
-   vp->m22 = state->scale[2];
-   vp->m30 = state->translate[0];
-   vp->m31 = state->translate[1];
-   vp->m32 = state->translate[2];
-
-   /* guardband in NDC space */
-   vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
-   vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
-   vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
-   vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
-
-   /* viewport in screen space */
-   vp->min_x = scale_x * -1.0f + state->translate[0];
-   vp->max_x = scale_x *  1.0f + state->translate[0];
-   vp->min_y = scale_y * -1.0f + state->translate[1];
-   vp->max_y = scale_y *  1.0f + state->translate[1];
-   vp->min_z = scale_z * -1.0f + state->translate[2];
-   vp->max_z = scale_z *  1.0f + state->translate[2];
-}
-
-/**
- * Translate a pipe logicop to the matching hardware logicop.
- */
-static int
-gen6_translate_pipe_logicop(unsigned logicop)
-{
-   switch (logicop) {
-   case PIPE_LOGICOP_CLEAR:         return GEN6_LOGICOP_CLEAR;
-   case PIPE_LOGICOP_NOR:           return GEN6_LOGICOP_NOR;
-   case PIPE_LOGICOP_AND_INVERTED:  return GEN6_LOGICOP_AND_INVERTED;
-   case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED;
-   case PIPE_LOGICOP_AND_REVERSE:   return GEN6_LOGICOP_AND_REVERSE;
-   case PIPE_LOGICOP_INVERT:        return GEN6_LOGICOP_INVERT;
-   case PIPE_LOGICOP_XOR:           return GEN6_LOGICOP_XOR;
-   case PIPE_LOGICOP_NAND:          return GEN6_LOGICOP_NAND;
-   case PIPE_LOGICOP_AND:           return GEN6_LOGICOP_AND;
-   case PIPE_LOGICOP_EQUIV:         return GEN6_LOGICOP_EQUIV;
-   case PIPE_LOGICOP_NOOP:          return GEN6_LOGICOP_NOOP;
-   case PIPE_LOGICOP_OR_INVERTED:   return GEN6_LOGICOP_OR_INVERTED;
-   case PIPE_LOGICOP_COPY:          return GEN6_LOGICOP_COPY;
-   case PIPE_LOGICOP_OR_REVERSE:    return GEN6_LOGICOP_OR_REVERSE;
-   case PIPE_LOGICOP_OR:            return GEN6_LOGICOP_OR;
-   case PIPE_LOGICOP_SET:           return GEN6_LOGICOP_SET;
-   default:
-      assert(!"unknown logicop function");
-      return GEN6_LOGICOP_CLEAR;
-   }
-}
-
-/**
- * Translate a pipe blend function to the matching hardware blend function.
- */
-static int
-gen6_translate_pipe_blend(unsigned blend)
-{
-   switch (blend) {
-   case PIPE_BLEND_ADD:                return GEN6_BLENDFUNCTION_ADD;
-   case PIPE_BLEND_SUBTRACT:           return GEN6_BLENDFUNCTION_SUBTRACT;
-   case PIPE_BLEND_REVERSE_SUBTRACT:   return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
-   case PIPE_BLEND_MIN:                return GEN6_BLENDFUNCTION_MIN;
-   case PIPE_BLEND_MAX:                return GEN6_BLENDFUNCTION_MAX;
-   default:
-      assert(!"unknown blend function");
-      return GEN6_BLENDFUNCTION_ADD;
-   };
-}
-
-/**
- * Translate a pipe blend factor to the matching hardware blend factor.
- */
-static int
-gen6_translate_pipe_blendfactor(unsigned blendfactor)
-{
-   switch (blendfactor) {
-   case PIPE_BLENDFACTOR_ONE:                return GEN6_BLENDFACTOR_ONE;
-   case PIPE_BLENDFACTOR_SRC_COLOR:          return GEN6_BLENDFACTOR_SRC_COLOR;
-   case PIPE_BLENDFACTOR_SRC_ALPHA:          return GEN6_BLENDFACTOR_SRC_ALPHA;
-   case PIPE_BLENDFACTOR_DST_ALPHA:          return GEN6_BLENDFACTOR_DST_ALPHA;
-   case PIPE_BLENDFACTOR_DST_COLOR:          return GEN6_BLENDFACTOR_DST_COLOR;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
-   case PIPE_BLENDFACTOR_CONST_COLOR:        return GEN6_BLENDFACTOR_CONST_COLOR;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:        return GEN6_BLENDFACTOR_CONST_ALPHA;
-   case PIPE_BLENDFACTOR_SRC1_COLOR:         return GEN6_BLENDFACTOR_SRC1_COLOR;
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:         return GEN6_BLENDFACTOR_SRC1_ALPHA;
-   case PIPE_BLENDFACTOR_ZERO:               return GEN6_BLENDFACTOR_ZERO;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:      return GEN6_BLENDFACTOR_INV_SRC_COLOR;
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:      return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:      return GEN6_BLENDFACTOR_INV_DST_ALPHA;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:      return GEN6_BLENDFACTOR_INV_DST_COLOR;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:    return GEN6_BLENDFACTOR_INV_CONST_COLOR;
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:    return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:     return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:     return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
-   default:
-      assert(!"unknown blend factor");
-      return GEN6_BLENDFACTOR_ONE;
-   };
-}
-
-/**
- * Translate a pipe stencil op to the matching hardware stencil op.
- */
-static int
-gen6_translate_pipe_stencil_op(unsigned stencil_op)
-{
-   switch (stencil_op) {
-   case PIPE_STENCIL_OP_KEEP:       return GEN6_STENCILOP_KEEP;
-   case PIPE_STENCIL_OP_ZERO:       return GEN6_STENCILOP_ZERO;
-   case PIPE_STENCIL_OP_REPLACE:    return GEN6_STENCILOP_REPLACE;
-   case PIPE_STENCIL_OP_INCR:       return GEN6_STENCILOP_INCRSAT;
-   case PIPE_STENCIL_OP_DECR:       return GEN6_STENCILOP_DECRSAT;
-   case PIPE_STENCIL_OP_INCR_WRAP:  return GEN6_STENCILOP_INCR;
-   case PIPE_STENCIL_OP_DECR_WRAP:  return GEN6_STENCILOP_DECR;
-   case PIPE_STENCIL_OP_INVERT:     return GEN6_STENCILOP_INVERT;
-   default:
-      assert(!"unknown stencil op");
-      return GEN6_STENCILOP_KEEP;
-   }
-}
-
-static int
-gen6_blend_factor_dst_alpha_forced_one(int factor)
-{
-   switch (factor) {
-   case GEN6_BLENDFACTOR_DST_ALPHA:
-      return GEN6_BLENDFACTOR_ONE;
-   case GEN6_BLENDFACTOR_INV_DST_ALPHA:
-   case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      return GEN6_BLENDFACTOR_ZERO;
-   default:
-      return factor;
-   }
-}
-
-static uint32_t
-blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev,
-                               const struct pipe_rt_blend_state *rt,
-                               bool dst_alpha_forced_one)
-{
-   int rgb_src, rgb_dst, a_src, a_dst;
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (!rt->blend_enable)
-      return 0;
-
-   rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
-   rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
-   a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
-   a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
-
-   if (dst_alpha_forced_one) {
-      rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
-      rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
-      a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
-      a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
-   }
-
-   dw = GEN6_RT_DW0_BLEND_ENABLE |
-        gen6_translate_pipe_blend(rt->alpha_func) << 26 |
-        a_src << 20 |
-        a_dst << 15 |
-        gen6_translate_pipe_blend(rt->rgb_func) << 11 |
-        rgb_src << 5 |
-        rgb_dst;
-
-   if (rt->rgb_func != rt->alpha_func ||
-       rgb_src != a_src || rgb_dst != a_dst)
-      dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE;
-
-   return dw;
-}
-
-static uint32_t
-blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev,
-                               const struct pipe_rt_blend_state *rt,
-                               bool dst_alpha_forced_one,
-                               bool *independent_alpha)
-{
-   int rgb_src, rgb_dst, a_src, a_dst;
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!rt->blend_enable) {
-      *independent_alpha = false;
-      return 0;
-   }
-
-   rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
-   rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
-   a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
-   a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
-
-   if (dst_alpha_forced_one) {
-      rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
-      rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
-      a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
-      a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
-   }
-
-   dw = GEN8_RT_DW0_BLEND_ENABLE |
-        rgb_src << 26 |
-        rgb_dst << 21 |
-        gen6_translate_pipe_blend(rt->rgb_func) << 18 |
-        a_src << 13 |
-        a_dst << 8 |
-        gen6_translate_pipe_blend(rt->alpha_func) << 5;
-
-   *independent_alpha = (rt->rgb_func != rt->alpha_func ||
-                         rgb_src != a_src ||
-                         rgb_dst != a_dst);
-
-   return dw;
-}
-
-static void
-blend_init_cso_gen6(const struct ilo_dev *dev,
-                    const struct pipe_blend_state *state,
-                    struct ilo_blend_state *blend,
-                    unsigned index)
-{
-   const struct pipe_rt_blend_state *rt = &state->rt[index];
-   struct ilo_blend_cso *cso = &blend->cso[index];
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   cso->payload[0] = 0;
-   cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
-                     GEN6_RT_DW1_PRE_BLEND_CLAMP |
-                     GEN6_RT_DW1_POST_BLEND_CLAMP;
-
-   if (!(rt->colormask & PIPE_MASK_A))
-      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A;
-   if (!(rt->colormask & PIPE_MASK_R))
-      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R;
-   if (!(rt->colormask & PIPE_MASK_G))
-      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G;
-   if (!(rt->colormask & PIPE_MASK_B))
-      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 365:
-    *
-    *     "Color Buffer Blending and Logic Ops must not be enabled
-    *      simultaneously, or behavior is UNDEFINED."
-    *
-    * Since state->logicop_enable takes precedence over rt->blend_enable,
-    * no special care is needed.
-    */
-   if (state->logicop_enable) {
-      cso->dw_blend = 0;
-      cso->dw_blend_dst_alpha_forced_one = 0;
-   } else {
-      cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false);
-      cso->dw_blend_dst_alpha_forced_one =
-         blend_get_rt_blend_enable_gen6(dev, rt, true);
-   }
-}
-
-static bool
-blend_init_cso_gen8(const struct ilo_dev *dev,
-                    const struct pipe_blend_state *state,
-                    struct ilo_blend_state *blend,
-                    unsigned index)
-{
-   const struct pipe_rt_blend_state *rt = &state->rt[index];
-   struct ilo_blend_cso *cso = &blend->cso[index];
-   bool independent_alpha = false;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   cso->payload[0] = 0;
-   cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT |
-                     GEN8_RT_DW1_PRE_BLEND_CLAMP |
-                     GEN8_RT_DW1_POST_BLEND_CLAMP;
-
-   if (!(rt->colormask & PIPE_MASK_A))
-      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A;
-   if (!(rt->colormask & PIPE_MASK_R))
-      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R;
-   if (!(rt->colormask & PIPE_MASK_G))
-      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G;
-   if (!(rt->colormask & PIPE_MASK_B))
-      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B;
-
-   if (state->logicop_enable) {
-      cso->dw_blend = 0;
-      cso->dw_blend_dst_alpha_forced_one = 0;
-   } else {
-      bool tmp[2];
-
-      cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]);
-      cso->dw_blend_dst_alpha_forced_one =
-         blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]);
-
-      if (tmp[0] || tmp[1])
-         independent_alpha = true;
-   }
-
-   return independent_alpha;
-}
-
-static uint32_t
-blend_get_logicop_enable_gen6(const struct ilo_dev *dev,
-                              const struct pipe_blend_state *state)
-{
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (!state->logicop_enable)
-      return 0;
-
-   return GEN6_RT_DW1_LOGICOP_ENABLE |
-          gen6_translate_pipe_logicop(state->logicop_func) << 18;
-}
-
-static uint32_t
-blend_get_logicop_enable_gen8(const struct ilo_dev *dev,
-                              const struct pipe_blend_state *state)
-{
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!state->logicop_enable)
-      return 0;
-
-   return GEN8_RT_DW1_LOGICOP_ENABLE |
-          gen6_translate_pipe_logicop(state->logicop_func) << 27;
-}
-
-static uint32_t
-blend_get_alpha_mod_gen6(const struct ilo_dev *dev,
-                         const struct pipe_blend_state *state,
-                         bool dual_blend)
-{
-   uint32_t dw = 0;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (state->alpha_to_coverage) {
-      dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE;
-      if (ilo_dev_gen(dev) >= ILO_GEN(7))
-         dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER;
-   }
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 378:
-    *
-    *     "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
-    *      must be disabled."
-    */
-   if (state->alpha_to_one && !dual_blend)
-      dw |= GEN6_RT_DW1_ALPHA_TO_ONE;
-
-   return dw;
-}
-
-static uint32_t
-blend_get_alpha_mod_gen8(const struct ilo_dev *dev,
-                         const struct pipe_blend_state *state,
-                         bool dual_blend)
-{
-   uint32_t dw = 0;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (state->alpha_to_coverage) {
-      dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE |
-            GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER;
-   }
-
-   if (state->alpha_to_one && !dual_blend)
-      dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE;
-
-   return dw;
-}
-
-static uint32_t
-blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0)
-{
-   int rgb_src, rgb_dst, a_src, a_dst;
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE))
-      return 0;
-
-   a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR);
-   a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR);
-   rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR);
-   rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR);
-
-   dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE;
-   dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR);
-   dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR);
-   dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR);
-   dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR);
-
-   if (a_src != rgb_src || a_dst != rgb_dst)
-      dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
-
-   return dw;
-}
-
-void
-ilo_gpe_init_blend(const struct ilo_dev *dev,
-                   const struct pipe_blend_state *state,
-                   struct ilo_blend_state *blend)
-{
-   unsigned i;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   blend->dual_blend = (util_blend_state_is_dual(state, 0) &&
-                        state->rt[0].blend_enable &&
-                        !state->logicop_enable);
-   blend->alpha_to_coverage = state->alpha_to_coverage;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      bool independent_alpha;
-
-      blend->dw_alpha_mod =
-         blend_get_alpha_mod_gen8(dev, state, blend->dual_blend);
-      blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state);
-      blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0;
-
-      independent_alpha = blend_init_cso_gen8(dev, state, blend, 0);
-      if (independent_alpha)
-         blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
-
-      blend->dw_ps_blend = blend_get_ps_blend_gen8(dev,
-            blend->cso[0].dw_blend);
-      blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev,
-            blend->cso[0].dw_blend_dst_alpha_forced_one);
-
-      if (state->independent_blend_enable) {
-         for (i = 1; i < Elements(blend->cso); i++) {
-            independent_alpha = blend_init_cso_gen8(dev, state, blend, i);
-            if (independent_alpha)
-               blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
-         }
-      } else {
-         for (i = 1; i < Elements(blend->cso); i++)
-            blend->cso[i] = blend->cso[0];
-      }
-   } else {
-      blend->dw_alpha_mod =
-         blend_get_alpha_mod_gen6(dev, state, blend->dual_blend);
-      blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state);
-      blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0;
-
-      blend->dw_ps_blend = 0;
-      blend->dw_ps_blend_dst_alpha_forced_one = 0;
-
-      blend_init_cso_gen6(dev, state, blend, 0);
-      if (state->independent_blend_enable) {
-         for (i = 1; i < Elements(blend->cso); i++)
-            blend_init_cso_gen6(dev, state, blend, i);
-      } else {
-         for (i = 1; i < Elements(blend->cso); i++)
-            blend->cso[i] = blend->cso[0];
-      }
-   }
-}
-
-/**
- * Translate a pipe DSA test function to the matching hardware compare
- * function.
- */
-static int
-gen6_translate_dsa_func(unsigned func)
-{
-   switch (func) {
-   case PIPE_FUNC_NEVER:      return GEN6_COMPAREFUNCTION_NEVER;
-   case PIPE_FUNC_LESS:       return GEN6_COMPAREFUNCTION_LESS;
-   case PIPE_FUNC_EQUAL:      return GEN6_COMPAREFUNCTION_EQUAL;
-   case PIPE_FUNC_LEQUAL:     return GEN6_COMPAREFUNCTION_LEQUAL;
-   case PIPE_FUNC_GREATER:    return GEN6_COMPAREFUNCTION_GREATER;
-   case PIPE_FUNC_NOTEQUAL:   return GEN6_COMPAREFUNCTION_NOTEQUAL;
-   case PIPE_FUNC_GEQUAL:     return GEN6_COMPAREFUNCTION_GEQUAL;
-   case PIPE_FUNC_ALWAYS:     return GEN6_COMPAREFUNCTION_ALWAYS;
-   default:
-      assert(!"unknown depth/stencil/alpha test function");
-      return GEN6_COMPAREFUNCTION_NEVER;
-   }
-}
-
-static uint32_t
-dsa_get_stencil_enable_gen6(const struct ilo_dev *dev,
-                            const struct pipe_stencil_state *stencil0,
-                            const struct pipe_stencil_state *stencil1)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (!stencil0->enabled)
-      return 0;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 359:
-    *
-    *     "If the Depth Buffer is either undefined or does not have a surface
-    *      format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
-    *      stencil buffer is disabled, Stencil Test Enable must be DISABLED"
-    *
-    * From the Sandy Bridge PRM, volume 2 part 1, page 370:
-    *
-    *     "This field (Stencil Test Enable) cannot be enabled if
-    *      Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
-    *
-    * TODO We do not check these yet.
-    */
-   dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE |
-        gen6_translate_dsa_func(stencil0->func) << 28 |
-        gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
-        gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
-        gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
-   if (stencil0->writemask)
-      dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
-
-   if (stencil1->enabled) {
-      dw |= GEN6_ZS_DW0_STENCIL1_ENABLE |
-            gen6_translate_dsa_func(stencil1->func) << 12 |
-            gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
-            gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
-            gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
-      if (stencil1->writemask)
-         dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
-   }
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_stencil_enable_gen8(const struct ilo_dev *dev,
-                            const struct pipe_stencil_state *stencil0,
-                            const struct pipe_stencil_state *stencil1)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!stencil0->enabled)
-      return 0;
-
-   dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 |
-        gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 |
-        gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 |
-        gen6_translate_dsa_func(stencil0->func) << 8 |
-        GEN8_ZS_DW1_STENCIL_TEST_ENABLE;
-   if (stencil0->writemask)
-      dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
-
-   if (stencil1->enabled) {
-      dw |= gen6_translate_dsa_func(stencil1->func) << 20 |
-            gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 |
-            gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 |
-            gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 |
-            GEN8_ZS_DW1_STENCIL1_ENABLE;
-      if (stencil1->writemask)
-         dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
-   }
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_depth_enable_gen6(const struct ilo_dev *dev,
-                          const struct pipe_depth_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 360:
-    *
-    *     "Enabling the Depth Test function without defining a Depth Buffer is
-    *      UNDEFINED."
-    *
-    * From the Sandy Bridge PRM, volume 2 part 1, page 375:
-    *
-    *     "A Depth Buffer must be defined before enabling writes to it, or
-    *      operation is UNDEFINED."
-    *
-    * TODO We do not check these yet.
-    */
-   if (state->enabled) {
-      dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
-           gen6_translate_dsa_func(state->func) << 27;
-   } else {
-      dw = GEN6_COMPAREFUNCTION_ALWAYS << 27;
-   }
-
-   if (state->writemask)
-      dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_depth_enable_gen8(const struct ilo_dev *dev,
-                          const struct pipe_depth_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (state->enabled) {
-      dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE |
-           gen6_translate_dsa_func(state->func) << 5;
-   } else {
-      dw = GEN6_COMPAREFUNCTION_ALWAYS << 5;
-   }
-
-   if (state->writemask)
-      dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE;
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_alpha_enable_gen6(const struct ilo_dev *dev,
-                          const struct pipe_alpha_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (!state->enabled)
-      return 0;
-
-   /* this will be ORed to BLEND_STATE */
-   dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE |
-        gen6_translate_dsa_func(state->func) << 13;
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_alpha_enable_gen8(const struct ilo_dev *dev,
-                          const struct pipe_alpha_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!state->enabled)
-      return 0;
-
-   /* this will be ORed to BLEND_STATE */
-   dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE |
-        gen6_translate_dsa_func(state->func) << 24;
-
-   return dw;
-}
-
-void
-ilo_gpe_init_dsa(const struct ilo_dev *dev,
-                 const struct pipe_depth_stencil_alpha_state *state,
-                 struct ilo_dsa_state *dsa)
-{
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   STATIC_ASSERT(Elements(dsa->payload) >= 3);
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev,
-            &state->stencil[0], &state->stencil[1]);
-      const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth);
-
-      assert(!(dw_stencil & dw_depth));
-      dsa->payload[0] = dw_stencil | dw_depth;
-
-      dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha);
-      dsa->dw_ps_blend_alpha = (state->alpha.enabled) ?
-         GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0;
-   } else {
-      dsa->payload[0] = dsa_get_stencil_enable_gen6(dev,
-            &state->stencil[0], &state->stencil[1]);
-      dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth);
-
-      dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha);
-      dsa->dw_ps_blend_alpha = 0;
-   }
-
-   dsa->payload[1] = state->stencil[0].valuemask << 24 |
-                     state->stencil[0].writemask << 16 |
-                     state->stencil[1].valuemask << 8 |
-                     state->stencil[1].writemask;
-
-   dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value);
-}
-
-void
-ilo_gpe_set_scissor(const struct ilo_dev *dev,
-                    unsigned start_slot,
-                    unsigned num_states,
-                    const struct pipe_scissor_state *states,
-                    struct ilo_scissor_state *scissor)
-{
-   unsigned i;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   for (i = 0; i < num_states; i++) {
-      uint16_t min_x, min_y, max_x, max_y;
-
-      /* both max and min are inclusive in SCISSOR_RECT */
-      if (states[i].minx < states[i].maxx &&
-          states[i].miny < states[i].maxy) {
-         min_x = states[i].minx;
-         min_y = states[i].miny;
-         max_x = states[i].maxx - 1;
-         max_y = states[i].maxy - 1;
-      }
-      else {
-         /* we have to make min greater than max */
-         min_x = 1;
-         min_y = 1;
-         max_x = 0;
-         max_y = 0;
-      }
-
-      scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
-      scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
-   }
-
-   if (!start_slot && num_states)
-      scissor->scissor0 = states[0];
-}
-
-void
-ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
-                         struct ilo_scissor_state *scissor)
-{
-   unsigned i;
-
-   for (i = 0; i < Elements(scissor->payload); i += 2) {
-      scissor->payload[i + 0] = 1 << 16 | 1;
-      scissor->payload[i + 1] = 0;
-   }
-}
-
-static void
-fb_set_blend_caps(const struct ilo_dev *dev,
-                  enum pipe_format format,
-                  struct ilo_fb_blend_caps *caps)
-{
-   const struct util_format_description *desc =
-      util_format_description(format);
-   const int ch = util_format_get_first_non_void_channel(format);
-
-   memset(caps, 0, sizeof(*caps));
-
-   if (format == PIPE_FORMAT_NONE || desc->is_mixed)
-      return;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 365:
-    *
-    *     "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
-    *      variants), otherwise Logic Ops must be DISABLED."
-    *
-    * According to the classic driver, this is lifted on Gen8+.
-    */
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      caps->can_logicop = true;
-   } else {
-      caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized &&
-            desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
-            desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
-   }
-
-   /* no blending for pure integer formats */
-   caps->can_blend = !util_format_is_pure_integer(format);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 382:
-    *
-    *     "Alpha Test can only be enabled if Pixel Shader outputs a float
-    *      alpha value."
-    */
-   caps->can_alpha_test = !util_format_is_pure_integer(format);
-
-   caps->dst_alpha_forced_one =
-      (ilo_format_translate_render(dev, format) !=
-       ilo_format_translate_color(dev, format));
-
-   /* sanity check */
-   if (caps->dst_alpha_forced_one) {
-      enum pipe_format render_format;
-
-      switch (format) {
-      case PIPE_FORMAT_B8G8R8X8_UNORM:
-         render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
-         break;
-      default:
-         render_format = PIPE_FORMAT_NONE;
-         break;
-      }
-
-      assert(ilo_format_translate_render(dev, format) ==
-             ilo_format_translate_color(dev, render_format));
-   }
-}
-
-void
-ilo_gpe_set_fb(const struct ilo_dev *dev,
-               const struct pipe_framebuffer_state *state,
-               struct ilo_fb_state *fb)
-{
-   const struct pipe_surface *first_surf = NULL;
-   int i;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   util_copy_framebuffer_state(&fb->state, state);
-
-   ilo_gpe_init_view_surface_null(dev,
-         (state->width) ? state->width : 1,
-         (state->height) ? state->height : 1,
-         1, 0, &fb->null_rt);
-
-   for (i = 0; i < state->nr_cbufs; i++) {
-      if (state->cbufs[i]) {
-         fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
-
-         if (!first_surf)
-            first_surf = state->cbufs[i];
-      } else {
-         fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
-      }
-   }
-
-   if (!first_surf && state->zsbuf)
-      first_surf = state->zsbuf;
-
-   fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
-   if (!fb->num_samples)
-      fb->num_samples = 1;
-
-   /*
-    * The PRMs list several restrictions when the framebuffer has more than
-    * one surface.  It seems they are actually lifted on GEN6+.
-    */
-}
diff --git a/src/gallium/drivers/ilo/ilo_state_3d_top.c b/src/gallium/drivers/ilo/ilo_state_3d_top.c
deleted file mode 100644 (file)
index f022972..0000000
+++ /dev/null
@@ -1,1713 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#include "genhw/genhw.h"
-#include "core/ilo_format.h"
-#include "util/u_dual_blend.h"
-#include "util/u_framebuffer.h"
-#include "util/u_half.h"
-#include "util/u_resource.h"
-
-#include "ilo_context.h"
-#include "ilo_resource.h"
-#include "ilo_shader.h"
-#include "ilo_state.h"
-#include "ilo_state_3d.h"
-
-static void
-ve_init_cso(const struct ilo_dev *dev,
-            const struct pipe_vertex_element *state,
-            unsigned vb_index,
-            struct ilo_ve_cso *cso)
-{
-   int comp[4] = {
-      GEN6_VFCOMP_STORE_SRC,
-      GEN6_VFCOMP_STORE_SRC,
-      GEN6_VFCOMP_STORE_SRC,
-      GEN6_VFCOMP_STORE_SRC,
-   };
-   int format;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   switch (util_format_get_nr_components(state->src_format)) {
-   case 1: comp[1] = GEN6_VFCOMP_STORE_0;
-   case 2: comp[2] = GEN6_VFCOMP_STORE_0;
-   case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
-                     GEN6_VFCOMP_STORE_1_INT :
-                     GEN6_VFCOMP_STORE_1_FP;
-   }
-
-   format = ilo_format_translate_vertex(dev, state->src_format);
-
-   STATIC_ASSERT(Elements(cso->payload) >= 2);
-   cso->payload[0] =
-      vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT |
-      GEN6_VE_DW0_VALID |
-      format << GEN6_VE_DW0_FORMAT__SHIFT |
-      state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
-
-   cso->payload[1] =
-         comp[0] << GEN6_VE_DW1_COMP0__SHIFT |
-         comp[1] << GEN6_VE_DW1_COMP1__SHIFT |
-         comp[2] << GEN6_VE_DW1_COMP2__SHIFT |
-         comp[3] << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_ve(const struct ilo_dev *dev,
-                unsigned num_states,
-                const struct pipe_vertex_element *states,
-                struct ilo_ve_state *ve)
-{
-   unsigned i;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   ve->count = num_states;
-   ve->vb_count = 0;
-
-   for (i = 0; i < num_states; i++) {
-      const unsigned pipe_idx = states[i].vertex_buffer_index;
-      const unsigned instance_divisor = states[i].instance_divisor;
-      unsigned hw_idx;
-
-      /*
-       * map the pipe vb to the hardware vb, which has a fixed instance
-       * divisor
-       */
-      for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
-         if (ve->vb_mapping[hw_idx] == pipe_idx &&
-             ve->instance_divisors[hw_idx] == instance_divisor)
-            break;
-      }
-
-      /* create one if there is no matching hardware vb */
-      if (hw_idx >= ve->vb_count) {
-         hw_idx = ve->vb_count++;
-
-         ve->vb_mapping[hw_idx] = pipe_idx;
-         ve->instance_divisors[hw_idx] = instance_divisor;
-      }
-
-      ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
-   }
-}
-
-void
-ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
-                        struct ilo_ve_cso *cso)
-{
-   int format;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 94:
-    *
-    *     "- This bit (Edge Flag Enable) must only be ENABLED on the last
-    *        valid VERTEX_ELEMENT structure.
-    *
-    *      - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
-    *        and Component 1-3 Control must be set to VFCOMP_NOSTORE.
-    *
-    *      - The Source Element Format must be set to the UINT format.
-    *
-    *      - [DevSNB]: Edge Flags are not supported for QUADLIST
-    *        primitives.  Software may elect to convert QUADLIST primitives
-    *        to some set of corresponding edge-flag-supported primitive
-    *        types (e.g., POLYGONs) prior to submission to the 3D pipeline."
-    */
-   cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE;
-
-   /*
-    * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
-    * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
-    * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
-    *
-    * Since all the hardware cares about is whether the flags are zero or not,
-    * we can treat them as the corresponding _UINT formats.
-    */
-   format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT);
-   cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK;
-
-   switch (format) {
-   case GEN6_FORMAT_R32_FLOAT:
-      format = GEN6_FORMAT_R32_UINT;
-      break;
-   case GEN6_FORMAT_R8_USCALED:
-      format = GEN6_FORMAT_R8_UINT;
-      break;
-   default:
-      break;
-   }
-
-   cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT);
-
-   cso->payload[1] =
-         GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
-         GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
-         GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
-         GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
-                          int comp0, int comp1, int comp2, int comp3,
-                          struct ilo_ve_cso *cso)
-{
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   STATIC_ASSERT(Elements(cso->payload) >= 2);
-
-   assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
-          comp1 != GEN6_VFCOMP_STORE_SRC &&
-          comp2 != GEN6_VFCOMP_STORE_SRC &&
-          comp3 != GEN6_VFCOMP_STORE_SRC);
-
-   cso->payload[0] = GEN6_VE_DW0_VALID;
-   cso->payload[1] =
-         comp0 << GEN6_VE_DW1_COMP0__SHIFT |
-         comp1 << GEN6_VE_DW1_COMP1__SHIFT |
-         comp2 << GEN6_VE_DW1_COMP2__SHIFT |
-         comp3 << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *vs,
-                    struct ilo_shader_cso *cso)
-{
-   int start_grf, vue_read_len, sampler_count, max_threads;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
-   vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
-   sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 135:
-    *
-    *     "(Vertex URB Entry Read Length) Specifies the number of pairs of
-    *      128-bit vertex elements to be passed into the payload for each
-    *      vertex."
-    *
-    *     "It is UNDEFINED to set this field to 0 indicating no Vertex URB
-    *      data to be read and passed to the thread."
-    */
-   vue_read_len = (vue_read_len + 1) / 2;
-   if (!vue_read_len)
-      vue_read_len = 1;
-
-   max_threads = dev->thread_count;
-   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2)
-      max_threads *= 2;
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
-         vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
-         0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
-
-   dw5 = GEN6_VS_DW5_STATISTICS |
-         GEN6_VS_DW5_VS_ENABLE;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
-      dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
-   else
-      dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 3);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-}
-
-static void
-gs_init_cso_gen6(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *gs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, vue_read_len, max_threads;
-   uint32_t dw2, dw4, dw5, dw6;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
-      start_grf = ilo_shader_get_kernel_param(gs,
-            ILO_KERNEL_URB_DATA_START_REG);
-
-      vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
-   }
-   else {
-      start_grf = ilo_shader_get_kernel_param(gs,
-            ILO_KERNEL_VS_GEN6_SO_START_REG);
-
-      vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
-   }
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 153:
-    *
-    *     "Specifies the amount of URB data read and passed in the thread
-    *      payload for each Vertex URB entry, in 256-bit register increments.
-    *
-    *      It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
-    *      0 indicating no Vertex URB data to be read and passed to the
-    *      thread."
-    */
-   vue_read_len = (vue_read_len + 1) / 2;
-   if (!vue_read_len)
-      vue_read_len = 1;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 154:
-    *
-    *     "Maximum Number of Threads valid range is [0,27] when Rendering
-    *      Enabled bit is set."
-    *
-    * From the Sandy Bridge PRM, volume 2 part 1, page 173:
-    *
-    *     "Programming Note: If the GS stage is enabled, software must always
-    *      allocate at least one GS URB Entry. This is true even if the GS
-    *      thread never needs to output vertices to the pipeline, e.g., when
-    *      only performing stream output. This is an artifact of the need to
-    *      pass the GS thread an initial destination URB handle."
-    *
-    * As such, we always enable rendering, and limit the number of threads.
-    */
-   if (dev->gt == 2) {
-      /* maximum is 60, but limited to 28 */
-      max_threads = 28;
-   }
-   else {
-      /* maximum is 24, but limited to 21 (see brwCreateContext()) */
-      max_threads = 21;
-   }
-
-   dw2 = GEN6_THREADDISP_SPF;
-
-   dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
-         0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
-         start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
-
-   dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
-         GEN6_GS_DW5_STATISTICS |
-         GEN6_GS_DW5_SO_STATISTICS |
-         GEN6_GS_DW5_RENDER_ENABLE;
-
-   /*
-    * we cannot make use of GEN6_GS_REORDER because it will reorder
-    * triangle strips according to D3D rules (triangle 2N+1 uses vertices
-    * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
-    * (2N+2, 2N+1, 2N+3)).
-    */
-   dw6 = GEN6_GS_DW6_GS_ENABLE;
-
-   if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
-      dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
-
-   if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
-      const uint32_t svbi_post_inc =
-         ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
-
-      dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
-      if (svbi_post_inc) {
-         dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
-                svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
-      }
-   }
-
-   STATIC_ASSERT(Elements(cso->payload) >= 4);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-   cso->payload[3] = dw6;
-}
-
-static void
-gs_init_cso_gen7(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *gs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, vue_read_len, sampler_count, max_threads;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
-   vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
-   sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
-
-   /* in pairs */
-   vue_read_len = (vue_read_len + 1) / 2;
-
-   switch (ilo_dev_gen(dev)) {
-   case ILO_GEN(7.5):
-      max_threads = (dev->gt >= 2) ? 256 : 70;
-      break;
-   case ILO_GEN(7):
-      max_threads = (dev->gt == 2) ? 128 : 36;
-      break;
-   default:
-      max_threads = 1;
-      break;
-   }
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
-         GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
-         0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
-         start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
-
-   dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
-         GEN7_GS_DW5_STATISTICS |
-         GEN7_GS_DW5_GS_ENABLE;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 3);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-}
-
-void
-ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *gs,
-                    struct ilo_shader_cso *cso)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(7))
-      gs_init_cso_gen7(dev, gs, cso);
-   else
-      gs_init_cso_gen6(dev, gs, cso);
-}
-
-static void
-view_init_null_gen6(const struct ilo_dev *dev,
-                    unsigned width, unsigned height,
-                    unsigned depth, unsigned level,
-                    struct ilo_view_surface *surf)
-{
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   assert(width >= 1 && height >= 1 && depth >= 1);
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 71:
-    *
-    *     "A null surface will be used in instances where an actual surface is
-    *      not bound. When a write message is generated to a null surface, no
-    *      actual surface is written to. When a read message (including any
-    *      sampling engine message) is generated to a null surface, the result
-    *      is all zeros. Note that a null surface type is allowed to be used
-    *      with all messages, even if it is not specificially indicated as
-    *      supported. All of the remaining fields in surface state are ignored
-    *      for null surfaces, with the following exceptions:
-    *
-    *        * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
-    *          depth buffer's corresponding state for all render target
-    *          surfaces, including null.
-    *        * Surface Format must be R8G8B8A8_UNORM."
-    *
-    * From the Sandy Bridge PRM, volume 4 part 1, page 82:
-    *
-    *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
-    *      true"
-    */
-
-   STATIC_ASSERT(Elements(surf->payload) >= 6);
-   dw = surf->payload;
-
-   dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
-           GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
-
-   dw[1] = 0;
-
-   dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
-           (width  - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
-           level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
-
-   dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
-           GEN6_TILING_X;
-
-   dw[4] = 0;
-   dw[5] = 0;
-}
-
-static void
-view_init_for_buffer_gen6(const struct ilo_dev *dev,
-                          const struct ilo_buffer *buf,
-                          unsigned offset, unsigned size,
-                          unsigned struct_size,
-                          enum pipe_format elem_format,
-                          bool is_rt, bool render_cache_rw,
-                          struct ilo_view_surface *surf)
-{
-   const int elem_size = util_format_get_blocksize(elem_format);
-   int width, height, depth, pitch;
-   int surface_format, num_entries;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   /*
-    * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
-    * structure in a buffer.
-    */
-
-   surface_format = ilo_format_translate_color(dev, elem_format);
-
-   num_entries = size / struct_size;
-   /* see if there is enough space to fit another element */
-   if (size % struct_size >= elem_size)
-      num_entries++;
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 76:
-    *
-    *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
-    *      Address) specifies the base address of first element of the
-    *      surface. The surface is interpreted as a simple array of that
-    *      single element type. The address must be naturally-aligned to the
-    *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
-    *      must be 16-byte aligned).
-    *
-    *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
-    *      the base address of the first element of the surface, computed in
-    *      software by adding the surface base address to the byte offset of
-    *      the element in the buffer."
-    */
-   if (is_rt)
-      assert(offset % elem_size == 0);
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 77:
-    *
-    *     "For buffer surfaces, the number of entries in the buffer ranges
-    *      from 1 to 2^27."
-    */
-   assert(num_entries >= 1 && num_entries <= 1 << 27);
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 81:
-    *
-    *     "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
-    *      indicates the size of the structure."
-    */
-   pitch = struct_size;
-
-   pitch--;
-   num_entries--;
-   /* bits [6:0] */
-   width  = (num_entries & 0x0000007f);
-   /* bits [19:7] */
-   height = (num_entries & 0x000fff80) >> 7;
-   /* bits [26:20] */
-   depth  = (num_entries & 0x07f00000) >> 20;
-
-   STATIC_ASSERT(Elements(surf->payload) >= 6);
-   dw = surf->payload;
-
-   dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
-           surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
-   if (render_cache_rw)
-      dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
-
-   dw[1] = offset;
-
-   dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
-           width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
-
-   dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
-           pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
-
-   dw[4] = 0;
-   dw[5] = 0;
-}
-
-static void
-view_init_for_texture_gen6(const struct ilo_dev *dev,
-                           const struct ilo_texture *tex,
-                           enum pipe_format format,
-                           unsigned first_level,
-                           unsigned num_levels,
-                           unsigned first_layer,
-                           unsigned num_layers,
-                           bool is_rt,
-                           struct ilo_view_surface *surf)
-{
-   int surface_type, surface_format;
-   int width, height, depth, pitch, lod;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
-   assert(surface_type != GEN6_SURFTYPE_BUFFER);
-
-   if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
-      format = PIPE_FORMAT_Z32_FLOAT;
-
-   if (is_rt)
-      surface_format = ilo_format_translate_render(dev, format);
-   else
-      surface_format = ilo_format_translate_texture(dev, format);
-   assert(surface_format >= 0);
-
-   width = tex->image.width0;
-   height = tex->image.height0;
-   depth = (tex->base.target == PIPE_TEXTURE_3D) ?
-      tex->base.depth0 : num_layers;
-   pitch = tex->image.bo_stride;
-
-   if (surface_type == GEN6_SURFTYPE_CUBE) {
-      /*
-       * From the Sandy Bridge PRM, volume 4 part 1, page 81:
-       *
-       *     "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
-       *      range of this field (Depth) is [0,84], indicating the number of
-       *      cube array elements (equal to the number of underlying 2D array
-       *      elements divided by 6). For other surfaces, this field must be
-       *      zero."
-       *
-       * When is_rt is true, we treat the texture as a 2D one to avoid the
-       * restriction.
-       */
-      if (is_rt) {
-         surface_type = GEN6_SURFTYPE_2D;
-      }
-      else {
-         assert(num_layers % 6 == 0);
-         depth = num_layers / 6;
-      }
-   }
-
-   /* sanity check the size */
-   assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
-   switch (surface_type) {
-   case GEN6_SURFTYPE_1D:
-      assert(width <= 8192 && height == 1 && depth <= 512);
-      assert(first_layer < 512 && num_layers <= 512);
-      break;
-   case GEN6_SURFTYPE_2D:
-      assert(width <= 8192 && height <= 8192 && depth <= 512);
-      assert(first_layer < 512 && num_layers <= 512);
-      break;
-   case GEN6_SURFTYPE_3D:
-      assert(width <= 2048 && height <= 2048 && depth <= 2048);
-      assert(first_layer < 2048 && num_layers <= 512);
-      if (!is_rt)
-         assert(first_layer == 0);
-      break;
-   case GEN6_SURFTYPE_CUBE:
-      assert(width <= 8192 && height <= 8192 && depth <= 85);
-      assert(width == height);
-      assert(first_layer < 512 && num_layers <= 512);
-      if (is_rt)
-         assert(first_layer == 0);
-      break;
-   default:
-      assert(!"unexpected surface type");
-      break;
-   }
-
-   /* non-full array spacing is supported only on GEN7+ */
-   assert(tex->image.walk != ILO_IMAGE_WALK_LOD);
-   /* non-interleaved samples are supported only on GEN7+ */
-   if (tex->base.nr_samples > 1)
-      assert(tex->image.interleaved_samples);
-
-   if (is_rt) {
-      assert(num_levels == 1);
-      lod = first_level;
-   }
-   else {
-      lod = num_levels - 1;
-   }
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 76:
-    *
-    *     "Linear render target surface base addresses must be element-size
-    *      aligned, for non-YUV surface formats, or a multiple of 2
-    *      element-sizes for YUV surface formats. Other linear surfaces have
-    *      no alignment requirements (byte alignment is sufficient.)"
-    *
-    * From the Sandy Bridge PRM, volume 4 part 1, page 81:
-    *
-    *     "For linear render target surfaces, the pitch must be a multiple
-    *      of the element size for non-YUV surface formats. Pitch must be a
-    *      multiple of 2 * element size for YUV surface formats."
-    *
-    * From the Sandy Bridge PRM, volume 4 part 1, page 86:
-    *
-    *     "For linear surfaces, this field (X Offset) must be zero"
-    */
-   if (tex->image.tiling == GEN6_TILING_NONE) {
-      if (is_rt) {
-         const int elem_size = util_format_get_blocksize(format);
-         assert(pitch % elem_size == 0);
-      }
-   }
-
-   STATIC_ASSERT(Elements(surf->payload) >= 6);
-   dw = surf->payload;
-
-   dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
-           surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
-           GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
-
-   if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
-      dw[0] |= 1 << 9 |
-               GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
-   }
-
-   if (is_rt)
-      dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
-
-   dw[1] = 0;
-
-   dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
-           (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
-           lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
-
-   assert(tex->image.tiling != GEN8_TILING_W);
-   dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
-           (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
-           tex->image.tiling;
-
-   dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
-           first_layer << 17 |
-           (num_layers - 1) << 8 |
-           ((tex->base.nr_samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
-                                         GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
-
-   dw[5] = 0;
-
-   assert(tex->image.align_j == 2 || tex->image.align_j == 4);
-   if (tex->image.align_j == 4)
-      dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
-}
-
-static void
-view_init_null_gen7(const struct ilo_dev *dev,
-                    unsigned width, unsigned height,
-                    unsigned depth, unsigned level,
-                    struct ilo_view_surface *surf)
-{
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 7, 8);
-
-   assert(width >= 1 && height >= 1 && depth >= 1);
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 62:
-    *
-    *     "A null surface is used in instances where an actual surface is not
-    *      bound. When a write message is generated to a null surface, no
-    *      actual surface is written to. When a read message (including any
-    *      sampling engine message) is generated to a null surface, the result
-    *      is all zeros.  Note that a null surface type is allowed to be used
-    *      with all messages, even if it is not specificially indicated as
-    *      supported. All of the remaining fields in surface state are ignored
-    *      for null surfaces, with the following exceptions:
-    *
-    *      * Width, Height, Depth, LOD, and Render Target View Extent fields
-    *        must match the depth buffer's corresponding state for all render
-    *        target surfaces, including null.
-    *      * All sampling engine and data port messages support null surfaces
-    *        with the above behavior, even if not mentioned as specifically
-    *        supported, except for the following:
-    *        * Data Port Media Block Read/Write messages.
-    *      * The Surface Type of a surface used as a render target (accessed
-    *        via the Data Port's Render Target Write message) must be the same
-    *        as the Surface Type of all other render targets and of the depth
-    *        buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
-    *        buffer or render targets are SURFTYPE_NULL."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 65:
-    *
-    *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
-    *      true"
-    */
-
-   STATIC_ASSERT(Elements(surf->payload) >= 13);
-   dw = surf->payload;
-
-   dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
-           GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8))
-      dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
-   else
-      dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
-
-   dw[1] = 0;
-
-   dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
-           GEN_SHIFT32(width  - 1, GEN7_SURFACE_DW2_WIDTH);
-
-   dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH);
-
-   dw[4] = 0;
-   dw[5] = level;
-
-   dw[6] = 0;
-   dw[7] = 0;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8))
-      memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
-}
-
-static void
-view_init_for_buffer_gen7(const struct ilo_dev *dev,
-                          const struct ilo_buffer *buf,
-                          unsigned offset, unsigned size,
-                          unsigned struct_size,
-                          enum pipe_format elem_format,
-                          bool is_rt, bool render_cache_rw,
-                          struct ilo_view_surface *surf)
-{
-   const bool typed = (elem_format != PIPE_FORMAT_NONE);
-   const bool structured = (!typed && struct_size > 1);
-   const int elem_size = (typed) ?
-      util_format_get_blocksize(elem_format) : 1;
-   int width, height, depth, pitch;
-   int surface_type, surface_format, num_entries;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 7, 8);
-
-   surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
-
-   surface_format = (typed) ?
-      ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW;
-
-   num_entries = size / struct_size;
-   /* see if there is enough space to fit another element */
-   if (size % struct_size >= elem_size && !structured)
-      num_entries++;
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 67:
-    *
-    *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
-    *      Address) specifies the base address of first element of the
-    *      surface. The surface is interpreted as a simple array of that
-    *      single element type. The address must be naturally-aligned to the
-    *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
-    *      must be 16-byte aligned)
-    *
-    *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
-    *      the base address of the first element of the surface, computed in
-    *      software by adding the surface base address to the byte offset of
-    *      the element in the buffer."
-    */
-   if (is_rt)
-      assert(offset % elem_size == 0);
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
-    *
-    *     "For typed buffer and structured buffer surfaces, the number of
-    *      entries in the buffer ranges from 1 to 2^27.  For raw buffer
-    *      surfaces, the number of entries in the buffer is the number of
-    *      bytes which can range from 1 to 2^30."
-    */
-   assert(num_entries >= 1 &&
-          num_entries <= 1 << ((typed || structured) ? 27 : 30));
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 69:
-    *
-    *     "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
-    *      11 if the Surface Format is RAW (the size of the buffer must be a
-    *      multiple of 4 bytes)."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 70:
-    *
-    *     "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
-    *      field (Surface Pitch) indicates the size of the structure."
-    *
-    *     "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
-    *      must be a multiple of 4 bytes."
-    */
-   if (structured)
-      assert(struct_size % 4 == 0);
-   else if (!typed)
-      assert(num_entries % 4 == 0);
-
-   pitch = struct_size;
-
-   pitch--;
-   num_entries--;
-   /* bits [6:0] */
-   width  = (num_entries & 0x0000007f);
-   /* bits [20:7] */
-   height = (num_entries & 0x001fff80) >> 7;
-   /* bits [30:21] */
-   depth  = (num_entries & 0x7fe00000) >> 21;
-   /* limit to [26:21] */
-   if (typed || structured)
-      depth &= 0x3f;
-
-   STATIC_ASSERT(Elements(surf->payload) >= 13);
-   dw = surf->payload;
-
-   dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
-           surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-   if (render_cache_rw)
-      dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      dw[8] = offset;
-      memset(&dw[9], 0, sizeof(*dw) * (13 - 9));
-   } else {
-      dw[1] = offset;
-   }
-
-   dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
-           GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
-
-   dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
-           pitch;
-
-   dw[4] = 0;
-   dw[5] = 0;
-
-   dw[6] = 0;
-   dw[7] = 0;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
-      dw[7] |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
-               GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
-               GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
-               GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
-   }
-}
-
-static void
-view_init_for_texture_gen7(const struct ilo_dev *dev,
-                           const struct ilo_texture *tex,
-                           enum pipe_format format,
-                           unsigned first_level,
-                           unsigned num_levels,
-                           unsigned first_layer,
-                           unsigned num_layers,
-                           bool is_rt,
-                           struct ilo_view_surface *surf)
-{
-   int surface_type, surface_format;
-   int width, height, depth, pitch, lod;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 7, 8);
-
-   surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
-   assert(surface_type != GEN6_SURFTYPE_BUFFER);
-
-   if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
-      format = PIPE_FORMAT_Z32_FLOAT;
-
-   if (is_rt)
-      surface_format = ilo_format_translate_render(dev, format);
-   else
-      surface_format = ilo_format_translate_texture(dev, format);
-   assert(surface_format >= 0);
-
-   width = tex->image.width0;
-   height = tex->image.height0;
-   depth = (tex->base.target == PIPE_TEXTURE_3D) ?
-      tex->base.depth0 : num_layers;
-   pitch = tex->image.bo_stride;
-
-   if (surface_type == GEN6_SURFTYPE_CUBE) {
-      /*
-       * From the Ivy Bridge PRM, volume 4 part 1, page 70:
-       *
-       *     "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
-       *      this field is [0,340], indicating the number of cube array
-       *      elements (equal to the number of underlying 2D array elements
-       *      divided by 6). For other surfaces, this field must be zero."
-       *
-       * When is_rt is true, we treat the texture as a 2D one to avoid the
-       * restriction.
-       */
-      if (is_rt) {
-         surface_type = GEN6_SURFTYPE_2D;
-      }
-      else {
-         assert(num_layers % 6 == 0);
-         depth = num_layers / 6;
-      }
-   }
-
-   /* sanity check the size */
-   assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
-   assert(first_layer < 2048 && num_layers <= 2048);
-   switch (surface_type) {
-   case GEN6_SURFTYPE_1D:
-      assert(width <= 16384 && height == 1 && depth <= 2048);
-      break;
-   case GEN6_SURFTYPE_2D:
-      assert(width <= 16384 && height <= 16384 && depth <= 2048);
-      break;
-   case GEN6_SURFTYPE_3D:
-      assert(width <= 2048 && height <= 2048 && depth <= 2048);
-      if (!is_rt)
-         assert(first_layer == 0);
-      break;
-   case GEN6_SURFTYPE_CUBE:
-      assert(width <= 16384 && height <= 16384 && depth <= 86);
-      assert(width == height);
-      if (is_rt)
-         assert(first_layer == 0);
-      break;
-   default:
-      assert(!"unexpected surface type");
-      break;
-   }
-
-   if (is_rt) {
-      assert(num_levels == 1);
-      lod = first_level;
-   }
-   else {
-      lod = num_levels - 1;
-   }
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
-    *
-    *     "The Base Address for linear render target surfaces and surfaces
-    *      accessed with the typed surface read/write data port messages must
-    *      be element-size aligned, for non-YUV surface formats, or a multiple
-    *      of 2 element-sizes for YUV surface formats.  Other linear surfaces
-    *      have no alignment requirements (byte alignment is sufficient)."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 70:
-    *
-    *     "For linear render target surfaces and surfaces accessed with the
-    *      typed data port messages, the pitch must be a multiple of the
-    *      element size for non-YUV surface formats. Pitch must be a multiple
-    *      of 2 * element size for YUV surface formats. For linear surfaces
-    *      with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
-    *      of 4 bytes.For other linear surfaces, the pitch can be any multiple
-    *      of bytes."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 74:
-    *
-    *     "For linear surfaces, this field (X Offset) must be zero."
-    */
-   if (tex->image.tiling == GEN6_TILING_NONE) {
-      if (is_rt) {
-         const int elem_size = util_format_get_blocksize(format);
-         assert(pitch % elem_size == 0);
-      }
-   }
-
-   STATIC_ASSERT(Elements(surf->payload) >= 13);
-   dw = surf->payload;
-
-   dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
-           surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
-    *
-    *     "If this field (Surface Array) is enabled, the Surface Type must be
-    *      SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
-    *      disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
-    *      SURFTYPE_CUBE, the Depth field must be set to zero."
-    *
-    * For non-3D sampler surfaces, resinfo (the sampler message) always
-    * returns zero for the number of layers when this field is not set.
-    */
-   if (surface_type != GEN6_SURFTYPE_3D) {
-      if (util_resource_is_array_texture(&tex->base))
-         dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
-      else
-         assert(depth == 1);
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      switch (tex->image.align_j) {
-      case 4:
-         dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
-         break;
-      case 8:
-         dw[0] |= GEN8_SURFACE_DW0_VALIGN_8;
-         break;
-      case 16:
-         dw[0] |= GEN8_SURFACE_DW0_VALIGN_16;
-         break;
-      default:
-         assert(!"unsupported valign");
-         break;
-      }
-
-      switch (tex->image.align_i) {
-      case 4:
-         dw[0] |= GEN8_SURFACE_DW0_HALIGN_4;
-         break;
-      case 8:
-         dw[0] |= GEN8_SURFACE_DW0_HALIGN_8;
-         break;
-      case 16:
-         dw[0] |= GEN8_SURFACE_DW0_HALIGN_16;
-         break;
-      default:
-         assert(!"unsupported halign");
-         break;
-      }
-
-      dw[0] |= tex->image.tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
-   } else {
-      assert(tex->image.align_i == 4 || tex->image.align_i == 8);
-      assert(tex->image.align_j == 2 || tex->image.align_j == 4);
-
-      if (tex->image.align_j == 4)
-         dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
-
-      if (tex->image.align_i == 8)
-         dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
-
-      assert(tex->image.tiling != GEN8_TILING_W);
-      dw[0] |= tex->image.tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
-
-      if (tex->image.walk == ILO_IMAGE_WALK_LOD)
-         dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
-      else
-         dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
-   }
-
-   if (is_rt)
-      dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
-
-   if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
-      dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      assert(tex->image.layer_height % 4 == 0);
-      dw[1] = tex->image.layer_height / 4;
-   } else {
-      dw[1] = 0;
-   }
-
-   dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
-           GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
-
-   dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
-           (pitch - 1);
-
-   dw[4] = first_layer << 18 |
-           (num_layers - 1) << 7;
-
-   /*
-    * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
-    * means the samples are interleaved.  The layouts are the same when the
-    * number of samples is 1.
-    */
-   if (tex->image.interleaved_samples && tex->base.nr_samples > 1) {
-      assert(!is_rt);
-      dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
-   }
-   else {
-      dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
-   }
-
-   switch (tex->base.nr_samples) {
-   case 0:
-   case 1:
-   default:
-      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
-      break;
-   case 2:
-      dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2;
-      break;
-   case 4:
-      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
-      break;
-   case 8:
-      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
-      break;
-   case 16:
-      dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16;
-      break;
-   }
-
-   dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
-           lod;
-
-   dw[6] = 0;
-   dw[7] = 0;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
-      dw[7] |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
-               GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
-               GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
-               GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8))
-      memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
-}
-
-void
-ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
-                               unsigned width, unsigned height,
-                               unsigned depth, unsigned level,
-                               struct ilo_view_surface *surf)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      view_init_null_gen7(dev,
-            width, height, depth, level, surf);
-   } else {
-      view_init_null_gen6(dev,
-            width, height, depth, level, surf);
-   }
-
-   surf->bo = NULL;
-   surf->scanout = false;
-}
-
-void
-ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
-                                     const struct ilo_buffer *buf,
-                                     unsigned offset, unsigned size,
-                                     unsigned struct_size,
-                                     enum pipe_format elem_format,
-                                     bool is_rt, bool render_cache_rw,
-                                     struct ilo_view_surface *surf)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      view_init_for_buffer_gen7(dev, buf, offset, size,
-            struct_size, elem_format, is_rt, render_cache_rw, surf);
-   } else {
-      view_init_for_buffer_gen6(dev, buf, offset, size,
-            struct_size, elem_format, is_rt, render_cache_rw, surf);
-   }
-
-   /* do not increment reference count */
-   surf->bo = buf->bo;
-   surf->scanout = false;
-}
-
-void
-ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev,
-                                      const struct ilo_texture *tex,
-                                      enum pipe_format format,
-                                      unsigned first_level,
-                                      unsigned num_levels,
-                                      unsigned first_layer,
-                                      unsigned num_layers,
-                                      bool is_rt,
-                                      struct ilo_view_surface *surf)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      view_init_for_texture_gen7(dev, tex, format,
-            first_level, num_levels, first_layer, num_layers,
-            is_rt, surf);
-   } else {
-      view_init_for_texture_gen6(dev, tex, format,
-            first_level, num_levels, first_layer, num_layers,
-            is_rt, surf);
-   }
-
-   /* do not increment reference count */
-   surf->bo = tex->image.bo;
-
-   /* assume imported RTs are scanouts */
-   surf->scanout = ((tex->base.bind & PIPE_BIND_SCANOUT) ||
-         (tex->imported && (tex->base.bind &  PIPE_BIND_RENDER_TARGET)));
-}
-
-static void
-sampler_init_border_color_gen6(const struct ilo_dev *dev,
-                               const union pipe_color_union *color,
-                               uint32_t *dw, int num_dwords)
-{
-   float rgba[4] = {
-      color->f[0], color->f[1], color->f[2], color->f[3],
-   };
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   assert(num_dwords >= 12);
-
-   /*
-    * This state is not documented in the Sandy Bridge PRM, but in the
-    * Ironlake PRM.  SNORM8 seems to be in DW11 instead of DW1.
-    */
-
-   /* IEEE_FP */
-   dw[1] = fui(rgba[0]);
-   dw[2] = fui(rgba[1]);
-   dw[3] = fui(rgba[2]);
-   dw[4] = fui(rgba[3]);
-
-   /* FLOAT_16 */
-   dw[5] = util_float_to_half(rgba[0]) |
-           util_float_to_half(rgba[1]) << 16;
-   dw[6] = util_float_to_half(rgba[2]) |
-           util_float_to_half(rgba[3]) << 16;
-
-   /* clamp to [-1.0f, 1.0f] */
-   rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
-   rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
-   rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
-   rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
-
-   /* SNORM16 */
-   dw[9] =  (int16_t) util_iround(rgba[0] * 32767.0f) |
-            (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
-   dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
-            (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
-
-   /* SNORM8 */
-   dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
-            (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
-            (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
-            (int8_t) util_iround(rgba[3] * 127.0f) << 24;
-
-   /* clamp to [0.0f, 1.0f] */
-   rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
-   rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
-   rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
-   rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
-
-   /* UNORM8 */
-   dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
-           (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
-           (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
-           (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
-
-   /* UNORM16 */
-   dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
-           (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
-   dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
-           (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
-}
-
-/**
- * Translate a pipe texture mipfilter to the matching hardware mipfilter.
- */
-static int
-gen6_translate_tex_mipfilter(unsigned filter)
-{
-   switch (filter) {
-   case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
-   case PIPE_TEX_MIPFILTER_LINEAR:  return GEN6_MIPFILTER_LINEAR;
-   case PIPE_TEX_MIPFILTER_NONE:    return GEN6_MIPFILTER_NONE;
-   default:
-      assert(!"unknown mipfilter");
-      return GEN6_MIPFILTER_NONE;
-   }
-}
-
-/**
- * Translate a pipe texture filter to the matching hardware mapfilter.
- */
-static int
-gen6_translate_tex_filter(unsigned filter)
-{
-   switch (filter) {
-   case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
-   case PIPE_TEX_FILTER_LINEAR:  return GEN6_MAPFILTER_LINEAR;
-   default:
-      assert(!"unknown sampler filter");
-      return GEN6_MAPFILTER_NEAREST;
-   }
-}
-
-/**
- * Translate a pipe texture coordinate wrapping mode to the matching hardware
- * wrapping mode.
- */
-static int
-gen6_translate_tex_wrap(unsigned wrap)
-{
-   switch (wrap) {
-   case PIPE_TEX_WRAP_CLAMP:              return GEN8_TEXCOORDMODE_HALF_BORDER;
-   case PIPE_TEX_WRAP_REPEAT:             return GEN6_TEXCOORDMODE_WRAP;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:      return GEN6_TEXCOORDMODE_CLAMP;
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:    return GEN6_TEXCOORDMODE_CLAMP_BORDER;
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:      return GEN6_TEXCOORDMODE_MIRROR;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-   default:
-      assert(!"unknown sampler wrap mode");
-      return GEN6_TEXCOORDMODE_WRAP;
-   }
-}
-
-/**
- * Translate a pipe shadow compare function to the matching hardware shadow
- * function.
- */
-static int
-gen6_translate_shadow_func(unsigned func)
-{
-   /*
-    * For PIPE_FUNC_x, the reference value is on the left-hand side of the
-    * comparison, and 1.0 is returned when the comparison is true.
-    *
-    * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
-    * the comparison, and 0.0 is returned when the comparison is true.
-    */
-   switch (func) {
-   case PIPE_FUNC_NEVER:      return GEN6_COMPAREFUNCTION_ALWAYS;
-   case PIPE_FUNC_LESS:       return GEN6_COMPAREFUNCTION_LEQUAL;
-   case PIPE_FUNC_EQUAL:      return GEN6_COMPAREFUNCTION_NOTEQUAL;
-   case PIPE_FUNC_LEQUAL:     return GEN6_COMPAREFUNCTION_LESS;
-   case PIPE_FUNC_GREATER:    return GEN6_COMPAREFUNCTION_GEQUAL;
-   case PIPE_FUNC_NOTEQUAL:   return GEN6_COMPAREFUNCTION_EQUAL;
-   case PIPE_FUNC_GEQUAL:     return GEN6_COMPAREFUNCTION_GREATER;
-   case PIPE_FUNC_ALWAYS:     return GEN6_COMPAREFUNCTION_NEVER;
-   default:
-      assert(!"unknown shadow compare function");
-      return GEN6_COMPAREFUNCTION_NEVER;
-   }
-}
-
-void
-ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
-                         const struct pipe_sampler_state *state,
-                         struct ilo_sampler_cso *sampler)
-{
-   int mip_filter, min_filter, mag_filter, max_aniso;
-   int lod_bias, max_lod, min_lod;
-   int wrap_s, wrap_t, wrap_r, wrap_cube;
-   uint32_t dw0, dw1, dw3;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   memset(sampler, 0, sizeof(*sampler));
-
-   mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
-   min_filter = gen6_translate_tex_filter(state->min_img_filter);
-   mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
-
-   sampler->anisotropic = state->max_anisotropy;
-
-   if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
-      max_aniso = state->max_anisotropy / 2 - 1;
-   else if (state->max_anisotropy > 16)
-      max_aniso = GEN6_ANISORATIO_16;
-   else
-      max_aniso = GEN6_ANISORATIO_2;
-
-   /*
-    *
-    * Here is how the hardware calculate per-pixel LOD, from my reading of the
-    * PRMs:
-    *
-    *  1) LOD is set to log2(ratio of texels to pixels) if not specified in
-    *     other ways.  The number of texels is measured using level
-    *     SurfMinLod.
-    *  2) Bias is added to LOD.
-    *  3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
-    *     compared with Base to determine whether magnification or
-    *     minification is needed.  (if preclamp is disabled, LOD is compared
-    *     with Base before clamping)
-    *  4) If magnification is needed, or no mipmapping is requested, LOD is
-    *     set to floor(MinLod).
-    *  5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
-    *
-    * With Gallium interface, Base is always zero and
-    * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
-    */
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      const float scale = 256.0f;
-
-      /* [-16.0, 16.0) in S4.8 */
-      lod_bias = (int)
-         (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
-      lod_bias &= 0x1fff;
-
-      /* [0.0, 14.0] in U4.8 */
-      max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
-      min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
-   }
-   else {
-      const float scale = 64.0f;
-
-      /* [-16.0, 16.0) in S4.6 */
-      lod_bias = (int)
-         (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
-      lod_bias &= 0x7ff;
-
-      /* [0.0, 13.0] in U4.6 */
-      max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
-      min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
-   }
-
-   /*
-    * We want LOD to be clamped to determine magnification/minification, and
-    * get set to zero when it is magnification or when mipmapping is disabled.
-    * The hardware would set LOD to floor(MinLod) and that is a problem when
-    * MinLod is greater than or equal to 1.0f.
-    *
-    * With Base being zero, it is always minification when MinLod is non-zero.
-    * To achieve our goal, we just need to set MinLod to zero and set
-    * MagFilter to MinFilter when mipmapping is disabled.
-    */
-   if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
-      min_lod = 0;
-      mag_filter = min_filter;
-   }
-
-   /* determine wrap s/t/r */
-   wrap_s = gen6_translate_tex_wrap(state->wrap_s);
-   wrap_t = gen6_translate_tex_wrap(state->wrap_t);
-   wrap_r = gen6_translate_tex_wrap(state->wrap_r);
-   if (ilo_dev_gen(dev) < ILO_GEN(8)) {
-      /*
-       * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
-       * PIPE_TEX_WRAP_CLAMP_TO_EDGE;  for linear filtering,
-       * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
-       * additionally clamping the texture coordinates to [0.0, 1.0].
-       *
-       * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8.  The
-       * clamping has to be taken care of in the shaders.  There are two
-       * filters here, but let the minification one has a say.
-       */
-      const bool clamp_is_to_edge =
-         (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
-
-      if (clamp_is_to_edge) {
-         if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER)
-            wrap_s = GEN6_TEXCOORDMODE_CLAMP;
-         if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER)
-            wrap_t = GEN6_TEXCOORDMODE_CLAMP;
-         if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER)
-            wrap_r = GEN6_TEXCOORDMODE_CLAMP;
-      } else {
-         if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) {
-            wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-            sampler->saturate_s = true;
-         }
-         if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) {
-            wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-            sampler->saturate_t = true;
-         }
-         if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) {
-            wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-            sampler->saturate_r = true;
-         }
-      }
-   }
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 107:
-    *
-    *     "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
-    *      and TEXCOORDMODE_CUBE settings are valid, and each TC component
-    *      must have the same Address Control mode."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 96:
-    *
-    *     "This field (Cube Surface Control Mode) must be set to
-    *      CUBECTRLMODE_PROGRAMMED"
-    *
-    * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
-    * map filtering.
-    */
-   if (state->seamless_cube_map &&
-       (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
-        state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
-      wrap_cube = GEN6_TEXCOORDMODE_CUBE;
-   }
-   else {
-      wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
-   }
-
-   if (!state->normalized_coords) {
-      /*
-       * From the Ivy Bridge PRM, volume 4 part 1, page 98:
-       *
-       *     "The following state must be set as indicated if this field
-       *      (Non-normalized Coordinate Enable) is enabled:
-       *
-       *      - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
-       *        TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
-       *      - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
-       *      - Mag Mode Filter must be MAPFILTER_NEAREST or
-       *        MAPFILTER_LINEAR.
-       *      - Min Mode Filter must be MAPFILTER_NEAREST or
-       *        MAPFILTER_LINEAR.
-       *      - Mip Mode Filter must be MIPFILTER_NONE.
-       *      - Min LOD must be 0.
-       *      - Max LOD must be 0.
-       *      - MIP Count must be 0.
-       *      - Surface Min LOD must be 0.
-       *      - Texture LOD Bias must be 0."
-       */
-      assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
-             wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
-      assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
-             wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
-      assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
-             wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
-
-      assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
-             mag_filter == GEN6_MAPFILTER_LINEAR);
-      assert(min_filter == GEN6_MAPFILTER_NEAREST ||
-             min_filter == GEN6_MAPFILTER_LINEAR);
-
-      /* work around a bug in util_blitter */
-      mip_filter = GEN6_MIPFILTER_NONE;
-
-      assert(mip_filter == GEN6_MIPFILTER_NONE);
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      dw0 = 1 << 28 |
-            mip_filter << 20 |
-            lod_bias << 1;
-
-      sampler->dw_filter = mag_filter << 17 |
-                           min_filter << 14;
-
-      sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
-                                 GEN6_MAPFILTER_ANISOTROPIC << 14 |
-                                 1;
-
-      dw1 = min_lod << 20 |
-            max_lod << 8;
-
-      if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
-         dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
-
-      dw3 = max_aniso << 19;
-
-      /* round the coordinates for linear filtering */
-      if (min_filter != GEN6_MAPFILTER_NEAREST) {
-         dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
-                 GEN6_SAMPLER_DW3_V_MIN_ROUND |
-                 GEN6_SAMPLER_DW3_R_MIN_ROUND);
-      }
-      if (mag_filter != GEN6_MAPFILTER_NEAREST) {
-         dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
-                 GEN6_SAMPLER_DW3_V_MAG_ROUND |
-                 GEN6_SAMPLER_DW3_R_MAG_ROUND);
-      }
-
-      if (!state->normalized_coords)
-         dw3 |= 1 << 10;
-
-      sampler->dw_wrap = wrap_s << 6 |
-                         wrap_t << 3 |
-                         wrap_r;
-
-      /*
-       * As noted in the classic i965 driver, the HW may still reference
-       * wrap_t and wrap_r for 1D textures.  We need to set them to a safe
-       * mode
-       */
-      sampler->dw_wrap_1d = wrap_s << 6 |
-                            GEN6_TEXCOORDMODE_WRAP << 3 |
-                            GEN6_TEXCOORDMODE_WRAP;
-
-      sampler->dw_wrap_cube = wrap_cube << 6 |
-                              wrap_cube << 3 |
-                              wrap_cube;
-
-      STATIC_ASSERT(Elements(sampler->payload) >= 7);
-
-      sampler->payload[0] = dw0;
-      sampler->payload[1] = dw1;
-      sampler->payload[2] = dw3;
-
-      memcpy(&sampler->payload[3],
-            state->border_color.ui, sizeof(state->border_color.ui));
-   }
-   else {
-      dw0 = 1 << 28 |
-            mip_filter << 20 |
-            lod_bias << 3;
-
-      if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
-         dw0 |= gen6_translate_shadow_func(state->compare_func);
-
-      sampler->dw_filter = (min_filter != mag_filter) << 27 |
-                           mag_filter << 17 |
-                           min_filter << 14;
-
-      sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
-                                 GEN6_MAPFILTER_ANISOTROPIC << 14;
-
-      dw1 = min_lod << 22 |
-            max_lod << 12;
-
-      sampler->dw_wrap = wrap_s << 6 |
-                         wrap_t << 3 |
-                         wrap_r;
-
-      sampler->dw_wrap_1d = wrap_s << 6 |
-                            GEN6_TEXCOORDMODE_WRAP << 3 |
-                            GEN6_TEXCOORDMODE_WRAP;
-
-      sampler->dw_wrap_cube = wrap_cube << 6 |
-                              wrap_cube << 3 |
-                              wrap_cube;
-
-      dw3 = max_aniso << 19;
-
-      /* round the coordinates for linear filtering */
-      if (min_filter != GEN6_MAPFILTER_NEAREST) {
-         dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
-                 GEN6_SAMPLER_DW3_V_MIN_ROUND |
-                 GEN6_SAMPLER_DW3_R_MIN_ROUND);
-      }
-      if (mag_filter != GEN6_MAPFILTER_NEAREST) {
-         dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
-                 GEN6_SAMPLER_DW3_V_MAG_ROUND |
-                 GEN6_SAMPLER_DW3_R_MAG_ROUND);
-      }
-
-      if (!state->normalized_coords)
-         dw3 |= 1;
-
-      STATIC_ASSERT(Elements(sampler->payload) >= 15);
-
-      sampler->payload[0] = dw0;
-      sampler->payload[1] = dw1;
-      sampler->payload[2] = dw3;
-
-      sampler_init_border_color_gen6(dev,
-            &state->border_color, &sampler->payload[3], 12);
-   }
-}