ilo: move emit functions so that they can be inlined.
authorCourtney Goeltzenleuchter <courtney@LunarG.com>
Mon, 5 Aug 2013 20:17:31 +0000 (14:17 -0600)
committerChia-I Wu <olvaffe@gmail.com>
Thu, 8 Aug 2013 03:39:21 +0000 (11:39 +0800)
src/gallium/drivers/ilo/ilo_gpe_gen6.c
src/gallium/drivers/ilo/ilo_gpe_gen6.h
src/gallium/drivers/ilo/ilo_gpe_gen7.c
src/gallium/drivers/ilo/ilo_gpe_gen7.h

index 1da2925d653753ca084f5452b1546b70976f5047..6059276f4d3a57a18f33b7c0f4256b5d1253eea3 100644 (file)
 #include "ilo_state.h"
 #include "ilo_gpe_gen6.h"
 
-/**
- * Translate winsys tiling to hardware tiling.
- */
-int
-ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
-{
-   switch (tiling) {
-   case INTEL_TILING_NONE:
-      return 0;
-   case INTEL_TILING_X:
-      return BRW_SURFACE_TILED;
-   case INTEL_TILING_Y:
-      return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
-   default:
-      assert(!"unknown tiling");
-      return 0;
-   }
-}
-
-/**
- * Translate a pipe primitive type to the matching hardware primitive type.
- */
-int
-ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
-{
-   static const int prim_mapping[PIPE_PRIM_MAX] = {
-      [PIPE_PRIM_POINTS]                     = _3DPRIM_POINTLIST,
-      [PIPE_PRIM_LINES]                      = _3DPRIM_LINELIST,
-      [PIPE_PRIM_LINE_LOOP]                  = _3DPRIM_LINELOOP,
-      [PIPE_PRIM_LINE_STRIP]                 = _3DPRIM_LINESTRIP,
-      [PIPE_PRIM_TRIANGLES]                  = _3DPRIM_TRILIST,
-      [PIPE_PRIM_TRIANGLE_STRIP]             = _3DPRIM_TRISTRIP,
-      [PIPE_PRIM_TRIANGLE_FAN]               = _3DPRIM_TRIFAN,
-      [PIPE_PRIM_QUADS]                      = _3DPRIM_QUADLIST,
-      [PIPE_PRIM_QUAD_STRIP]                 = _3DPRIM_QUADSTRIP,
-      [PIPE_PRIM_POLYGON]                    = _3DPRIM_POLYGON,
-      [PIPE_PRIM_LINES_ADJACENCY]            = _3DPRIM_LINELIST_ADJ,
-      [PIPE_PRIM_LINE_STRIP_ADJACENCY]       = _3DPRIM_LINESTRIP_ADJ,
-      [PIPE_PRIM_TRIANGLES_ADJACENCY]        = _3DPRIM_TRILIST_ADJ,
-      [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]   = _3DPRIM_TRISTRIP_ADJ,
-   };
-
-   assert(prim_mapping[prim]);
-
-   return prim_mapping[prim];
-}
-
-/**
- * Translate a pipe texture target to the matching hardware surface type.
- */
-int
-ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
-{
-   switch (target) {
-   case PIPE_BUFFER:
-      return BRW_SURFACE_BUFFER;
-   case PIPE_TEXTURE_1D:
-   case PIPE_TEXTURE_1D_ARRAY:
-      return BRW_SURFACE_1D;
-   case PIPE_TEXTURE_2D:
-   case PIPE_TEXTURE_RECT:
-   case PIPE_TEXTURE_2D_ARRAY:
-      return BRW_SURFACE_2D;
-   case PIPE_TEXTURE_3D:
-      return BRW_SURFACE_3D;
-   case PIPE_TEXTURE_CUBE:
-   case PIPE_TEXTURE_CUBE_ARRAY:
-      return BRW_SURFACE_CUBE;
-   default:
-      assert(!"unknown texture target");
-      return BRW_SURFACE_BUFFER;
-   }
-}
-
-/**
- * Translate a depth/stencil pipe format to the matching hardware
- * format.  Return -1 on errors.
- */
-static int
-gen6_translate_depth_format(enum pipe_format format)
-{
-   switch (format) {
-   case PIPE_FORMAT_Z16_UNORM:
-      return BRW_DEPTHFORMAT_D16_UNORM;
-   case PIPE_FORMAT_Z32_FLOAT:
-      return BRW_DEPTHFORMAT_D32_FLOAT;
-   case PIPE_FORMAT_Z24X8_UNORM:
-      return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-      return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-      return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
-   default:
-      return -1;
-   }
-}
-
 /**
  * Translate a pipe logicop to the matching hardware logicop.
  */
@@ -294,28 +197,6 @@ gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
    }
 }
 
-/**
- * Translate a pipe DSA test function to the matching hardware compare
- * function.
- */
-static int
-gen6_translate_dsa_func(unsigned func)
-{
-   switch (func) {
-   case PIPE_FUNC_NEVER:      return BRW_COMPAREFUNCTION_NEVER;
-   case PIPE_FUNC_LESS:       return BRW_COMPAREFUNCTION_LESS;
-   case PIPE_FUNC_EQUAL:      return BRW_COMPAREFUNCTION_EQUAL;
-   case PIPE_FUNC_LEQUAL:     return BRW_COMPAREFUNCTION_LEQUAL;
-   case PIPE_FUNC_GREATER:    return BRW_COMPAREFUNCTION_GREATER;
-   case PIPE_FUNC_NOTEQUAL:   return BRW_COMPAREFUNCTION_NOTEQUAL;
-   case PIPE_FUNC_GEQUAL:     return BRW_COMPAREFUNCTION_GEQUAL;
-   case PIPE_FUNC_ALWAYS:     return BRW_COMPAREFUNCTION_ALWAYS;
-   default:
-      assert(!"unknown depth/stencil/alpha test function");
-      return BRW_COMPAREFUNCTION_NEVER;
-   }
-}
-
 /**
  * Translate a pipe shadow compare function to the matching hardware shadow
  * function.
@@ -345,520 +226,6 @@ gen6_translate_shadow_func(unsigned func)
    }
 }
 
-/**
- * Translate an index size to the matching hardware index format.
- */
-static int
-gen6_translate_index_size(int size)
-{
-   switch (size) {
-   case 4: return BRW_INDEX_DWORD;
-   case 2: return BRW_INDEX_WORD;
-   case 1: return BRW_INDEX_BYTE;
-   default:
-      assert(!"unknown index size");
-      return BRW_INDEX_BYTE;
-   }
-}
-
-static void
-gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
-                             struct intel_bo *general_state_bo,
-                             struct intel_bo *surface_state_bo,
-                             struct intel_bo *dynamic_state_bo,
-                             struct intel_bo *indirect_object_bo,
-                             struct intel_bo *instruction_bo,
-                             uint32_t general_state_size,
-                             uint32_t dynamic_state_size,
-                             uint32_t indirect_object_size,
-                             uint32_t instruction_size,
-                             struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
-   const uint8_t cmd_len = 10;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /* 4K-page aligned */
-   assert(((general_state_size | dynamic_state_size |
-            indirect_object_size | instruction_size) & 0xfff) == 0);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-
-   ilo_cp_write_bo(cp, 1, general_state_bo,
-                       INTEL_DOMAIN_RENDER,
-                       0);
-   ilo_cp_write_bo(cp, 1, surface_state_bo,
-                       INTEL_DOMAIN_SAMPLER,
-                       0);
-   ilo_cp_write_bo(cp, 1, dynamic_state_bo,
-                       INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
-                       0);
-   ilo_cp_write_bo(cp, 1, indirect_object_bo,
-                       0,
-                       0);
-   ilo_cp_write_bo(cp, 1, instruction_bo,
-                       INTEL_DOMAIN_INSTRUCTION,
-                       0);
-
-   if (general_state_size) {
-      ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
-                          INTEL_DOMAIN_RENDER,
-                          0);
-   }
-   else {
-      /* skip range check */
-      ilo_cp_write(cp, 1);
-   }
-
-   if (dynamic_state_size) {
-      ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
-                          INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
-                          0);
-   }
-   else {
-      /* skip range check */
-      ilo_cp_write(cp, 0xfffff000 + 1);
-   }
-
-   if (indirect_object_size) {
-      ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
-                          0,
-                          0);
-   }
-   else {
-      /* skip range check */
-      ilo_cp_write(cp, 0xfffff000 + 1);
-   }
-
-   if (instruction_size) {
-      ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
-                          INTEL_DOMAIN_INSTRUCTION,
-                          0);
-   }
-   else {
-      /* skip range check */
-      ilo_cp_write(cp, 1);
-   }
-
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
-                    uint32_t sip,
-                    struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
-   const uint8_t cmd_len = 2;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
-   ilo_cp_write(cp, cmd);
-   ilo_cp_write(cp, sip);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
-                                bool enable,
-                                struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
-   const uint8_t cmd_len = 1;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | enable);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
-                          int pipeline,
-                          struct ilo_cp *cp)
-{
-   const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
-   const uint8_t cmd_len = 1;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /* 3D or media */
-   assert(pipeline == 0x0 || pipeline == 0x1);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | pipeline);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
-                          int max_threads, int num_urb_entries,
-                          int urb_entry_size,
-                          struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
-   const uint8_t cmd_len = 8;
-   uint32_t dw2, dw4;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   dw2 = (max_threads - 1) << 16 |
-         num_urb_entries << 8 |
-         1 << 7 | /* Reset Gateway Timer */
-         1 << 6;  /* Bypass Gateway Control */
-
-   dw4 = urb_entry_size << 16 |  /* URB Entry Allocation Size */
-         480;                    /* CURBE Allocation Size */
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* MBZ */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, 0); /* scoreboard */
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
-                          uint32_t buf, int size,
-                          struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
-   const uint8_t cmd_len = 4;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   assert(buf % 32 == 0);
-   /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
-   size = align(size, 32);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0); /* MBZ */
-   ilo_cp_write(cp, size);
-   ilo_cp_write(cp, buf);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
-                                          uint32_t offset, int num_ids,
-                                          struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
-   const uint8_t cmd_len = 4;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   assert(offset % 32 == 0);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0); /* MBZ */
-   /* every ID has 8 DWords */
-   ilo_cp_write(cp, num_ids * 8 * 4);
-   ilo_cp_write(cp, offset);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
-                              int id, int byte, int thread_count,
-                              struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
-   const uint8_t cmd_len = 2;
-   uint32_t dw1;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   dw1 = id << 16 |
-         byte << 8 |
-         thread_count;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
-                            int thread_count_water_mark,
-                            int barrier_mask,
-                            struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
-   const uint8_t cmd_len = 2;
-   uint32_t dw1;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   dw1 = thread_count_water_mark << 16 |
-         barrier_mask;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
-                              struct ilo_cp *cp)
-{
-   assert(!"MEDIA_OBJECT_WALKER unsupported");
-}
-
-static void
-gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
-                                         uint32_t vs_binding_table,
-                                         uint32_t gs_binding_table,
-                                         uint32_t ps_binding_table,
-                                         struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
-   const uint8_t cmd_len = 4;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2) |
-                    GEN6_BINDING_TABLE_MODIFY_VS |
-                    GEN6_BINDING_TABLE_MODIFY_GS |
-                    GEN6_BINDING_TABLE_MODIFY_PS);
-   ilo_cp_write(cp, vs_binding_table);
-   ilo_cp_write(cp, gs_binding_table);
-   ilo_cp_write(cp, ps_binding_table);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                         uint32_t vs_sampler_state,
-                                         uint32_t gs_sampler_state,
-                                         uint32_t ps_sampler_state,
-                                         struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
-   const uint8_t cmd_len = 4;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2) |
-                    VS_SAMPLER_STATE_CHANGE |
-                    GS_SAMPLER_STATE_CHANGE |
-                    PS_SAMPLER_STATE_CHANGE);
-   ilo_cp_write(cp, vs_sampler_state);
-   ilo_cp_write(cp, gs_sampler_state);
-   ilo_cp_write(cp, ps_sampler_state);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
-                      int vs_total_size, int gs_total_size,
-                      int vs_entry_size, int gs_entry_size,
-                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
-   const uint8_t cmd_len = 3;
-   const int row_size = 128; /* 1024 bits */
-   int vs_alloc_size, gs_alloc_size;
-   int vs_num_entries, gs_num_entries;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   /* in 1024-bit URB rows */
-   vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
-   gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
-
-   /* the valid range is [1, 5] */
-   if (!vs_alloc_size)
-      vs_alloc_size = 1;
-   if (!gs_alloc_size)
-      gs_alloc_size = 1;
-   assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
-
-   /* the valid range is [24, 256] in multiples of 4 */
-   vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
-   if (vs_num_entries > 256)
-      vs_num_entries = 256;
-   assert(vs_num_entries >= 24);
-
-   /* the valid range is [0, 256] in multiples of 4 */
-   gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
-   if (gs_num_entries > 256)
-      gs_num_entries = 256;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
-                    vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
-   ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
-                    (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
-                                 const struct ilo_ve_state *ve,
-                                 const struct ilo_vb_state *vb,
-                                 struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
-   uint8_t cmd_len;
-   unsigned hw_idx;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 82:
-    *
-    *     "From 1 to 33 VBs can be specified..."
-    */
-   assert(ve->vb_count <= 33);
-
-   if (!ve->vb_count)
-      return;
-
-   cmd_len = 1 + 4 * ve->vb_count;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-
-   for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
-      const unsigned instance_divisor = ve->instance_divisors[hw_idx];
-      const unsigned pipe_idx = ve->vb_mapping[hw_idx];
-      const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
-      uint32_t dw;
-
-      dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
-
-      if (instance_divisor)
-         dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
-      else
-         dw |= GEN6_VB0_ACCESS_VERTEXDATA;
-
-      if (dev->gen >= ILO_GEN(7))
-         dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
-
-      /* use null vb if there is no buffer or the stride is out of range */
-      if (cso->buffer && cso->stride <= 2048) {
-         const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
-         const uint32_t start_offset = cso->buffer_offset;
-         /*
-          * As noted in ilo_translate_format(), we treat some 3-component
-          * formats as 4-component formats to work around hardware
-          * limitations.  Imagine the case where the vertex buffer holds a
-          * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
-          * The hardware would not be able to fetch it because the vertex
-          * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
-          * and that takes at least 8 bytes.
-          *
-          * For the workaround to work, we query the physical size, which is
-          * page aligned, to calculate end_offset so that the last vertex has
-          * a better chance to be fetched.
-          */
-         const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
-
-         dw |= cso->stride << BRW_VB0_PITCH_SHIFT;
-
-         ilo_cp_write(cp, dw);
-         ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
-         ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
-         ilo_cp_write(cp, instance_divisor);
-      }
-      else {
-         dw |= 1 << 13;
-
-         ilo_cp_write(cp, dw);
-         ilo_cp_write(cp, 0);
-         ilo_cp_write(cp, 0);
-         ilo_cp_write(cp, instance_divisor);
-      }
-   }
-
-   ilo_cp_end(cp);
-}
-
-static void
-ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
-                    struct ilo_ve_cso *cso)
-{
-   int format;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 94:
-    *
-    *     "- This bit (Edge Flag Enable) must only be ENABLED on the last
-    *        valid VERTEX_ELEMENT structure.
-    *
-    *      - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
-    *        and Component 1-3 Control must be set to VFCOMP_NOSTORE.
-    *
-    *      - The Source Element Format must be set to the UINT format.
-    *
-    *      - [DevSNB]: Edge Flags are not supported for QUADLIST
-    *        primitives.  Software may elect to convert QUADLIST primitives
-    *        to some set of corresponding edge-flag-supported primitive
-    *        types (e.g., POLYGONs) prior to submission to the 3D pipeline."
-    */
-
-   cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
-   cso->payload[1] =
-         BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
-         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
-         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
-         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
-
-   /*
-    * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
-    * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
-    * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
-    *
-    * Since all the hardware cares about is whether the flags are zero or not,
-    * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
-    */
-   format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
-   if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
-      STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
-            BRW_SURFACEFORMAT_R32_FLOAT - 1);
-
-      cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
-   }
-   else {
-      assert(format == BRW_SURFACEFORMAT_R8_UINT);
-   }
-}
-
-static void
-ve_init_cso_with_components(const struct ilo_dev_info *dev,
-                            int comp0, int comp1, int comp2, int comp3,
-                            struct ilo_ve_cso *cso)
-{
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   STATIC_ASSERT(Elements(cso->payload) >= 2);
-   cso->payload[0] = GEN6_VE0_VALID;
-   cso->payload[1] =
-         comp0 << BRW_VE1_COMPONENT_0_SHIFT |
-         comp1 << BRW_VE1_COMPONENT_1_SHIFT |
-         comp2 << BRW_VE1_COMPONENT_2_SHIFT |
-         comp3 << BRW_VE1_COMPONENT_3_SHIFT;
-}
-
 static void
 ve_init_cso(const struct ilo_dev_info *dev,
             const struct pipe_vertex_element *state,
@@ -914,202 +281,29 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev,
 
    for (i = 0; i < num_states; i++) {
       const unsigned pipe_idx = states[i].vertex_buffer_index;
-      const unsigned instance_divisor = states[i].instance_divisor;
-      unsigned hw_idx;
-
-      /*
-       * map the pipe vb to the hardware vb, which has a fixed instance
-       * divisor
-       */
-      for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
-         if (ve->vb_mapping[hw_idx] == pipe_idx &&
-             ve->instance_divisors[hw_idx] == instance_divisor)
-            break;
-      }
-
-      /* create one if there is no matching hardware vb */
-      if (hw_idx >= ve->vb_count) {
-         hw_idx = ve->vb_count++;
-
-         ve->vb_mapping[hw_idx] = pipe_idx;
-         ve->instance_divisors[hw_idx] = instance_divisor;
-      }
-
-      ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
-   }
-}
-
-static void
-gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
-                                  const struct ilo_ve_state *ve,
-                                  bool last_velement_edgeflag,
-                                  bool prepend_generated_ids,
-                                  struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
-   uint8_t cmd_len;
-   unsigned i;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 93:
-    *
-    *     "Up to 34 (DevSNB+) vertex elements are supported."
-    */
-   assert(ve->count + prepend_generated_ids <= 34);
-
-   if (!ve->count && !prepend_generated_ids) {
-      struct ilo_ve_cso dummy;
-
-      ve_init_cso_with_components(dev,
-            BRW_VE1_COMPONENT_STORE_0,
-            BRW_VE1_COMPONENT_STORE_0,
-            BRW_VE1_COMPONENT_STORE_0,
-            BRW_VE1_COMPONENT_STORE_1_FLT,
-            &dummy);
-
-      cmd_len = 3;
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write_multi(cp, dummy.payload, 2);
-      ilo_cp_end(cp);
-
-      return;
-   }
-
-   cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-
-   if (prepend_generated_ids) {
-      struct ilo_ve_cso gen_ids;
-
-      ve_init_cso_with_components(dev,
-            BRW_VE1_COMPONENT_STORE_VID,
-            BRW_VE1_COMPONENT_STORE_IID,
-            BRW_VE1_COMPONENT_NOSTORE,
-            BRW_VE1_COMPONENT_NOSTORE,
-            &gen_ids);
-
-      ilo_cp_write_multi(cp, gen_ids.payload, 2);
-   }
-
-   if (last_velement_edgeflag) {
-      struct ilo_ve_cso edgeflag;
-
-      for (i = 0; i < ve->count - 1; i++)
-         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
-
-      edgeflag = ve->cso[i];
-      ve_set_cso_edgeflag(dev, &edgeflag);
-      ilo_cp_write_multi(cp, edgeflag.payload, 2);
-   }
-   else {
-      for (i = 0; i < ve->count; i++)
-         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
-   }
-
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
-                               const struct ilo_ib_state *ib,
-                               bool enable_cut_index,
-                               struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
-   const uint8_t cmd_len = 3;
-   struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
-   uint32_t start_offset, end_offset;
-   int format;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   if (!buf)
-      return;
-
-   format = gen6_translate_index_size(ib->hw_index_size);
-
-   /*
-    * set start_offset to 0 here and adjust pipe_draw_info::start with
-    * ib->draw_start_offset in 3DPRIMITIVE
-    */
-   start_offset = 0;
-   end_offset = buf->bo_size;
-
-   /* end_offset must also be aligned and is inclusive */
-   end_offset -= (end_offset % ib->hw_index_size);
-   end_offset--;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2) |
-                    ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
-                    format << 8);
-   ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
-   ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                          uint32_t clip_viewport,
-                                          uint32_t sf_viewport,
-                                          uint32_t cc_viewport,
-                                          struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
-   const uint8_t cmd_len = 4;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2) |
-                    GEN6_CLIP_VIEWPORT_MODIFY |
-                    GEN6_SF_VIEWPORT_MODIFY |
-                    GEN6_CC_VIEWPORT_MODIFY);
-   ilo_cp_write(cp, clip_viewport);
-   ilo_cp_write(cp, sf_viewport);
-   ilo_cp_write(cp, cc_viewport);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                    uint32_t blend_state,
-                                    uint32_t depth_stencil_state,
-                                    uint32_t color_calc_state,
-                                    struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
-   const uint8_t cmd_len = 4;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+      const unsigned instance_divisor = states[i].instance_divisor;
+      unsigned hw_idx;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, blend_state | 1);
-   ilo_cp_write(cp, depth_stencil_state | 1);
-   ilo_cp_write(cp, color_calc_state | 1);
-   ilo_cp_end(cp);
-}
+      /*
+       * map the pipe vb to the hardware vb, which has a fixed instance
+       * divisor
+       */
+      for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+         if (ve->vb_mapping[hw_idx] == pipe_idx &&
+             ve->instance_divisors[hw_idx] == instance_divisor)
+            break;
+      }
 
-static void
-gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                         uint32_t scissor_rect,
-                                         struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
-   const uint8_t cmd_len = 2;
+      /* create one if there is no matching hardware vb */
+      if (hw_idx >= ve->vb_count) {
+         hw_idx = ve->vb_count++;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7);
+         ve->vb_mapping[hw_idx] = pipe_idx;
+         ve->instance_divisors[hw_idx] = instance_divisor;
+      }
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, scissor_rect);
-   ilo_cp_end(cp);
+      ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
+   }
 }
 
 void
@@ -1189,48 +383,6 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
    cso->payload[2] = dw5;
 }
 
-static void
-gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *vs,
-                     int num_samplers,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
-   const uint8_t cmd_len = 6;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   if (!vs) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-      return;
-   }
-
-   cso = ilo_shader_get_kernel_cso(vs);
-   dw2 = cso->payload[0];
-   dw4 = cso->payload[1];
-   dw5 = cso->payload[2];
-
-   dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_end(cp);
-}
-
 void
 ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
                          const struct ilo_shader_state *gs,
@@ -1333,75 +485,6 @@ ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
    cso->payload[3] = dw6;
 }
 
-static void
-gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *gs,
-                     const struct ilo_shader_state *vs,
-                     int verts_per_prim,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
-   const uint8_t cmd_len = 7;
-   uint32_t dw1, dw2, dw4, dw5, dw6;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   if (gs) {
-      const struct ilo_shader_cso *cso;
-
-      dw1 = ilo_shader_get_kernel_offset(gs);
-
-      cso = ilo_shader_get_kernel_cso(gs);
-      dw2 = cso->payload[0];
-      dw4 = cso->payload[1];
-      dw5 = cso->payload[2];
-      dw6 = cso->payload[3];
-   }
-   else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
-      struct ilo_shader_cso cso;
-      enum ilo_kernel_param param;
-
-      switch (verts_per_prim) {
-      case 1:
-         param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
-         break;
-      case 2:
-         param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
-         break;
-      default:
-         param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
-         break;
-      }
-
-      dw1 = ilo_shader_get_kernel_offset(vs) +
-         ilo_shader_get_kernel_param(vs, param);
-
-      /* cannot use VS's CSO */
-      ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
-      dw2 = cso.payload[0];
-      dw4 = cso.payload[1];
-      dw5 = cso.payload[2];
-      dw6 = cso.payload[3];
-   }
-   else {
-      dw1 = 0;
-      dw2 = 0;
-      dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
-      dw5 = GEN6_GS_STATISTICS_ENABLE;
-      dw6 = 0;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_write(cp, dw6);
-   ilo_cp_end(cp);
-}
-
 void
 ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
                              const struct pipe_rasterizer_state *state,
@@ -1490,53 +573,6 @@ ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
       clip->can_enable_guardband = false;
 }
 
-static void
-gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
-                       const struct ilo_rasterizer_state *rasterizer,
-                       const struct ilo_shader_state *fs,
-                       bool enable_guardband,
-                       int num_viewports,
-                       struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
-   const uint8_t cmd_len = 4;
-   uint32_t dw1, dw2, dw3;
-
-   if (rasterizer) {
-      int interps;
-
-      dw1 = rasterizer->clip.payload[0];
-      dw2 = rasterizer->clip.payload[1];
-      dw3 = rasterizer->clip.payload[2];
-
-      if (enable_guardband && rasterizer->clip.can_enable_guardband)
-         dw2 |= GEN6_CLIP_GB_TEST;
-
-      interps = (fs) ?  ilo_shader_get_kernel_param(fs,
-            ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
-
-      if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
-                     1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
-                     1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
-         dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
-
-      dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
-             (num_viewports - 1);
-   }
-   else {
-      dw1 = 0;
-      dw2 = 0;
-      dw3 = 0;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, dw3);
-   ilo_cp_end(cp);
-}
-
 void
 ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
                            const struct pipe_rasterizer_state *state,
@@ -1741,172 +777,6 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
    }
 }
 
-/**
- * Fill in DW2 to DW7 of 3DSTATE_SF.
- */
-void
-ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
-                                    const struct ilo_rasterizer_state *rasterizer,
-                                    int num_samples,
-                                    enum pipe_format depth_format,
-                                    uint32_t *payload, unsigned payload_len)
-{
-   const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
-
-   assert(payload_len == Elements(sf->payload));
-
-   if (sf) {
-      memcpy(payload, sf->payload, sizeof(sf->payload));
-
-      if (num_samples > 1)
-         payload[1] |= sf->dw_msaa;
-
-      if (dev->gen >= ILO_GEN(7)) {
-         int format;
-
-         /* separate stencil */
-         switch (depth_format) {
-         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-            depth_format = PIPE_FORMAT_Z24X8_UNORM;
-            break;
-         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-            depth_format = PIPE_FORMAT_Z32_FLOAT;;
-            break;
-         case PIPE_FORMAT_S8_UINT:
-            depth_format = PIPE_FORMAT_NONE;
-            break;
-         default:
-            break;
-         }
-
-         format = gen6_translate_depth_format(depth_format);
-         /* FLOAT surface is assumed when there is no depth buffer */
-         if (format < 0)
-            format = BRW_DEPTHFORMAT_D32_FLOAT;
-
-         payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
-      }
-   }
-   else {
-      payload[0] = 0;
-      payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
-      payload[2] = 0;
-      payload[3] = 0;
-      payload[4] = 0;
-      payload[5] = 0;
-   }
-}
-
-/**
- * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
- */
-void
-ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
-                                 const struct ilo_rasterizer_state *rasterizer,
-                                 const struct ilo_shader_state *fs,
-                                 const struct ilo_shader_state *last_sh,
-                                 uint32_t *dw, int num_dwords)
-{
-   int output_count, vue_offset, vue_len;
-   const struct ilo_kernel_routing *routing;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-   assert(num_dwords == 13);
-
-   if (!fs) {
-      memset(dw, 0, sizeof(dw[0]) * num_dwords);
-
-      if (dev->gen >= ILO_GEN(7))
-         dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
-      else
-         dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
-
-      return;
-   }
-
-   output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
-   assert(output_count <= 32);
-
-   routing = ilo_shader_get_kernel_routing(fs);
-
-   vue_offset = routing->source_skip;
-   assert(vue_offset % 2 == 0);
-   vue_offset /= 2;
-
-   vue_len = (routing->source_len + 1) / 2;
-   if (!vue_len)
-      vue_len = 1;
-
-   if (dev->gen >= ILO_GEN(7)) {
-      dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
-              vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
-              vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
-      if (routing->swizzle_enable)
-         dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
-   }
-   else {
-      dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
-              vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
-              vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
-      if (routing->swizzle_enable)
-         dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
-   }
-
-   switch (rasterizer->state.sprite_coord_mode) {
-   case PIPE_SPRITE_COORD_UPPER_LEFT:
-      dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
-      break;
-   case PIPE_SPRITE_COORD_LOWER_LEFT:
-      dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
-      break;
-   }
-
-   STATIC_ASSERT(Elements(routing->swizzles) >= 16);
-   memcpy(&dw[1], routing->swizzles, 2 * 16);
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 268:
-    *
-    *     "This field (Point Sprite Texture Coordinate Enable) must be
-    *      programmed to 0 when non-point primitives are rendered."
-    *
-    * TODO We do not check that yet.
-    */
-   dw[9] = routing->point_sprite_enable;
-
-   dw[10] = routing->const_interp_enable;
-
-   /* WrapShortest enables */
-   dw[11] = 0;
-   dw[12] = 0;
-}
-
-static void
-gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
-                     const struct ilo_rasterizer_state *rasterizer,
-                     const struct ilo_shader_state *fs,
-                     const struct ilo_shader_state *last_sh,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
-   const uint8_t cmd_len = 20;
-   uint32_t payload_raster[6], payload_sbe[13];
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
-         1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
-   ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
-         fs, last_sh, payload_sbe, Elements(payload_sbe));
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, payload_sbe[0]);
-   ilo_cp_write_multi(cp, payload_raster, 6);
-   ilo_cp_write_multi(cp, &payload_sbe[1], 12);
-   ilo_cp_end(cp);
-}
-
 void
 ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
                                 const struct pipe_rasterizer_state *state,
@@ -1975,364 +845,79 @@ ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
 
    dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
 
-   dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
-         0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
-         0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
-
-   dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
-    *
-    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
-    *      PS kernel or color calculator has the ability to kill (discard)
-    *      pixels or samples, other than due to depth or stencil testing.
-    *      This bit is required to be ENABLED in the following situations:
-    *
-    *      The API pixel shader program contains "killpix" or "discard"
-    *      instructions, or other code in the pixel shader kernel that can
-    *      cause the final pixel mask to differ from the pixel mask received
-    *      on dispatch.
-    *
-    *      A sampler with chroma key enabled with kill pixel mode is used by
-    *      the pixel shader.
-    *
-    *      Any render target has Alpha Test Enable or AlphaToCoverage Enable
-    *      enabled.
-    *
-    *      The pixel shader kernel generates and outputs oMask.
-    *
-    *      Note: As ClipDistance clipping is fully supported in hardware and
-    *      therefore not via PS instructions, there should be no need to
-    *      ENABLE this bit due to ClipDistance clipping."
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw5 |= GEN6_WM_KILL_ENABLE;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
-    *
-    *     "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
-    *      field must be set to disabled."
-    *
-    * TODO This is not checked yet.
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw5 |= GEN6_WM_COMPUTED_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw5 |= GEN6_WM_USES_SOURCE_W;
-
-   /*
-    * TODO set this bit only when
-    *
-    *  a) fs writes colors and color is not masked, or
-    *  b) fs writes depth, or
-    *  c) fs or cc kills
-    */
-   if (true)
-      dw5 |= GEN6_WM_DISPATCH_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
-
-   dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
-         GEN6_WM_POSOFFSET_NONE |
-         interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 4);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-   cso->payload[3] = dw6;
-}
-
-static void
-gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *fs,
-                     int num_samplers,
-                     const struct ilo_rasterizer_state *rasterizer,
-                     bool dual_blend, bool cc_may_kill,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
-   const uint8_t cmd_len = 9;
-   const int num_samples = 1;
-   const struct ilo_shader_cso *fs_cso;
-   uint32_t dw2, dw4, dw5, dw6;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   if (!fs) {
-      /* see brwCreateContext() */
-      const int max_threads = (dev->gt == 2) ? 80 : 40;
-
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      /* honor the valid range even if dispatching is disabled */
-      ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-
-      return;
-   }
-
-   fs_cso = ilo_shader_get_kernel_cso(fs);
-   dw2 = fs_cso->payload[0];
-   dw4 = fs_cso->payload[1];
-   dw5 = fs_cso->payload[2];
-   dw6 = fs_cso->payload[3];
-
-   dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
-
-   if (true) {
-      dw4 |= GEN6_WM_STATISTICS_ENABLE;
-   }
-   else {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 248:
-       *
-       *     "This bit (Statistics Enable) must be disabled if either of these
-       *      bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
-       *      Resolve Enable or Depth Buffer Resolve Enable."
-       */
-      dw4 |= GEN6_WM_DEPTH_CLEAR;
-      dw4 |= GEN6_WM_DEPTH_RESOLVE;
-      dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
-   }
-
-   if (cc_may_kill) {
-      dw5 |= GEN6_WM_KILL_ENABLE |
-             GEN6_WM_DISPATCH_ENABLE;
-   }
-
-   if (dual_blend)
-      dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
-
-   dw5 |= rasterizer->wm.payload[0];
-
-   dw6 |= rasterizer->wm.payload[1];
-
-   if (num_samples > 1) {
-      dw6 |= rasterizer->wm.dw_msaa_rast |
-             rasterizer->wm.dw_msaa_disp;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_write(cp, dw6);
-   ilo_cp_write(cp, 0); /* kernel 1 */
-   ilo_cp_write(cp, 0); /* kernel 2 */
-   ilo_cp_end(cp);
-}
-
-static unsigned
-gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
-                           const uint32_t *bufs, const int *sizes,
-                           int num_bufs, int max_read_length,
-                           uint32_t *dw, int num_dwords)
-{
-   unsigned enabled = 0x0;
-   int total_read_length, i;
-
-   assert(num_dwords == 4);
-
-   total_read_length = 0;
-   for (i = 0; i < 4; i++) {
-      if (i < num_bufs && sizes[i]) {
-         /* in 256-bit units minus one */
-         const int read_len = (sizes[i] + 31) / 32 - 1;
-
-         assert(bufs[i] % 32 == 0);
-         assert(read_len < 32);
-
-         enabled |= 1 << i;
-         dw[i] = bufs[i] | read_len;
-
-         total_read_length += read_len + 1;
-      }
-      else {
-         dw[i] = 0;
-      }
-   }
-
-   assert(total_read_length <= max_read_length);
-
-   return enabled;
-}
-
-static void
-gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
-   const uint8_t cmd_len = 5;
-   uint32_t buf_dw[4], buf_enabled;
+   dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
+         0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
+         0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-   assert(num_bufs <= 4);
+   dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
 
    /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 138:
+    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+    *
+    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
+    *      PS kernel or color calculator has the ability to kill (discard)
+    *      pixels or samples, other than due to depth or stencil testing.
+    *      This bit is required to be ENABLED in the following situations:
+    *
+    *      The API pixel shader program contains "killpix" or "discard"
+    *      instructions, or other code in the pixel shader kernel that can
+    *      cause the final pixel mask to differ from the pixel mask received
+    *      on dispatch.
+    *
+    *      A sampler with chroma key enabled with kill pixel mode is used by
+    *      the pixel shader.
+    *
+    *      Any render target has Alpha Test Enable or AlphaToCoverage Enable
+    *      enabled.
     *
-    *     "The sum of all four read length fields (each incremented to
-    *      represent the actual read length) must be less than or equal to 32"
+    *      The pixel shader kernel generates and outputs oMask.
+    *
+    *      Note: As ClipDistance clipping is fully supported in hardware and
+    *      therefore not via PS instructions, there should be no need to
+    *      ENABLE this bit due to ClipDistance clipping."
     */
-   buf_enabled = gen6_fill_3dstate_constant(dev,
-         bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
-   ilo_cp_write(cp, buf_dw[0]);
-   ilo_cp_write(cp, buf_dw[1]);
-   ilo_cp_write(cp, buf_dw[2]);
-   ilo_cp_write(cp, buf_dw[3]);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
-   const uint8_t cmd_len = 5;
-   uint32_t buf_dw[4], buf_enabled;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-   assert(num_bufs <= 4);
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
+      dw5 |= GEN6_WM_KILL_ENABLE;
 
    /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 161:
+    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+    *
+    *     "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
+    *      field must be set to disabled."
     *
-    *     "The sum of all four read length fields (each incremented to
-    *      represent the actual read length) must be less than or equal to 64"
+    * TODO This is not checked yet.
     */
-   buf_enabled = gen6_fill_3dstate_constant(dev,
-         bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
-   ilo_cp_write(cp, buf_dw[0]);
-   ilo_cp_write(cp, buf_dw[1]);
-   ilo_cp_write(cp, buf_dw[2]);
-   ilo_cp_write(cp, buf_dw[3]);
-   ilo_cp_end(cp);
-}
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
+      dw5 |= GEN6_WM_COMPUTED_DEPTH;
 
-static void
-gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
-   const uint8_t cmd_len = 5;
-   uint32_t buf_dw[4], buf_enabled;
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+      dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-   assert(num_bufs <= 4);
+   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+      dw5 |= GEN6_WM_USES_SOURCE_W;
 
    /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 287:
+    * TODO set this bit only when
     *
-    *     "The sum of all four read length fields (each incremented to
-    *      represent the actual read length) must be less than or equal to 64"
+    *  a) fs writes colors and color is not masked, or
+    *  b) fs writes depth, or
+    *  c) fs or cc kills
     */
-   buf_enabled = gen6_fill_3dstate_constant(dev,
-         bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
-   ilo_cp_write(cp, buf_dw[0]);
-   ilo_cp_write(cp, buf_dw[1]);
-   ilo_cp_write(cp, buf_dw[2]);
-   ilo_cp_write(cp, buf_dw[3]);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
-                              unsigned sample_mask,
-                              struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
-   const uint8_t cmd_len = 2;
-   const unsigned valid_mask = 0xf;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   sample_mask &= valid_mask;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, sample_mask);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
-                                    unsigned x, unsigned y,
-                                    unsigned width, unsigned height,
-                                    struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
-   const uint8_t cmd_len = 4;
-   unsigned xmax = x + width - 1;
-   unsigned ymax = y + height - 1;
-   int rect_limit;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   if (dev->gen >= ILO_GEN(7)) {
-      rect_limit = 16383;
-   }
-   else {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 230:
-       *
-       *     "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
-       *      must be an even number"
-       */
-      assert(y % 2 == 0);
-
-      rect_limit = 8191;
-   }
-
-   if (x > rect_limit) x = rect_limit;
-   if (y > rect_limit) y = rect_limit;
-   if (xmax > rect_limit) xmax = rect_limit;
-   if (ymax > rect_limit) ymax = rect_limit;
+   if (true)
+      dw5 |= GEN6_WM_DISPATCH_ENABLE;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, y << 16 | x);
-   ilo_cp_write(cp, ymax << 16 | xmax);
+   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+   dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
 
-   /*
-    * There is no need to set the origin.  It is intended to support front
-    * buffer rendering.
-    */
-   ilo_cp_write(cp, 0);
+   dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
+         GEN6_WM_POSOFFSET_NONE |
+         interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 
-   ilo_cp_end(cp);
+   STATIC_ASSERT(Elements(cso->payload) >= 4);
+   cso->payload[0] = dw2;
+   cso->payload[1] = dw4;
+   cso->payload[2] = dw5;
+   cso->payload[3] = dw6;
 }
 
 struct ilo_zs_surface_info {
@@ -2712,441 +1297,39 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
    zs->payload[4] = dw5;
    zs->payload[5] = dw6;
 
-   /* do not increment reference count */
-   zs->bo = info.zs.bo;
-
-   /* separate stencil */
-   if (info.stencil.bo) {
-      assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
-             info.stencil.stride % 128 == 0);
-
-      zs->payload[6] = info.stencil.stride - 1;
-      zs->payload[7] = info.stencil.offset;
-
-      /* do not increment reference count */
-      zs->separate_s8_bo = info.stencil.bo;
-   }
-   else {
-      zs->payload[6] = 0;
-      zs->payload[7] = 0;
-      zs->separate_s8_bo = NULL;
-   }
-
-   /* hiz */
-   if (info.hiz.bo) {
-      zs->payload[8] = info.hiz.stride - 1;
-      zs->payload[9] = info.hiz.offset;
-
-      /* do not increment reference count */
-      zs->hiz_bo = info.hiz.bo;
-   }
-   else {
-      zs->payload[8] = 0;
-      zs->payload[9] = 0;
-      zs->hiz_bo = NULL;
-   }
-}
-
-static void
-gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
-                               const struct ilo_zs_surface *zs,
-                               struct ilo_cp *cp)
-{
-   const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
-      ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
-   const uint8_t cmd_len = 7;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, zs->payload[0]);
-   ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
-         INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
-   ilo_cp_write(cp, zs->payload[2]);
-   ilo_cp_write(cp, zs->payload[3]);
-   ilo_cp_write(cp, zs->payload[4]);
-   ilo_cp_write(cp, zs->payload[5]);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
-                                      int x_offset, int y_offset,
-                                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
-   const uint8_t cmd_len = 2;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-   assert(x_offset >= 0 && x_offset <= 31);
-   assert(y_offset >= 0 && y_offset <= 31);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, x_offset << 8 | y_offset);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
-                                       const struct pipe_poly_stipple *pattern,
-                                       struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
-   const uint8_t cmd_len = 33;
-   int i;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-   assert(Elements(pattern->stipple) == 32);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   for (i = 0; i < 32; i++)
-      ilo_cp_write(cp, pattern->stipple[i]);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
-                               unsigned pattern, unsigned factor,
-                               struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
-   const uint8_t cmd_len = 3;
-   unsigned inverse;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-   assert((pattern & 0xffff) == pattern);
-   assert(factor >= 1 && factor <= 256);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, pattern);
-
-   if (dev->gen >= ILO_GEN(7)) {
-      /* in U1.16 */
-      inverse = (unsigned) (65536.0f / factor);
-      ilo_cp_write(cp, inverse << 15 | factor);
-   }
-   else {
-      /* in U1.13 */
-      inverse = (unsigned) (8192.0f / factor);
-      ilo_cp_write(cp, inverse << 16 | factor);
-   }
-
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
-                                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
-   const uint8_t cmd_len = 3;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0 << 16 | 0);
-   ilo_cp_write(cp, 0 << 16 | 0);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
-                               int index, unsigned svbi,
-                               unsigned max_svbi,
-                               bool load_vertex_count,
-                               struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
-   const uint8_t cmd_len = 4;
-   uint32_t dw1;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-   assert(index >= 0 && index < 4);
-
-   dw1 = index << SVB_INDEX_SHIFT;
-   if (load_vertex_count)
-      dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, svbi);
-   ilo_cp_write(cp, max_svbi);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
-                              int num_samples,
-                              const uint32_t *packed_sample_pos,
-                              bool pixel_location_center,
-                              struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
-   const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
-   uint32_t dw1, dw2, dw3;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   dw1 = (pixel_location_center) ?
-      MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
-
-   switch (num_samples) {
-   case 0:
-   case 1:
-      dw1 |= MS_NUMSAMPLES_1;
-      dw2 = 0;
-      dw3 = 0;
-      break;
-   case 4:
-      dw1 |= MS_NUMSAMPLES_4;
-      dw2 = packed_sample_pos[0];
-      dw3 = 0;
-      break;
-   case 8:
-      assert(dev->gen >= ILO_GEN(7));
-      dw1 |= MS_NUMSAMPLES_8;
-      dw2 = packed_sample_pos[0];
-      dw3 = packed_sample_pos[1];
-      break;
-   default:
-      assert(!"unsupported sample count");
-      dw1 |= MS_NUMSAMPLES_1;
-      dw2 = 0;
-      dw3 = 0;
-      break;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   if (dev->gen >= ILO_GEN(7))
-      ilo_cp_write(cp, dw3);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
-                                 const struct ilo_zs_surface *zs,
-                                 struct ilo_cp *cp)
-{
-   const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
-      ILO_GPE_CMD(0x3, 0x0, 0x06) :
-      ILO_GPE_CMD(0x3, 0x1, 0x0e);
-   const uint8_t cmd_len = 3;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   /* see ilo_gpe_init_zs_surface() */
-   ilo_cp_write(cp, zs->payload[6]);
-   ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
-         INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
-                                    const struct ilo_zs_surface *zs,
-                                    struct ilo_cp *cp)
-{
-   const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
-      ILO_GPE_CMD(0x3, 0x0, 0x07) :
-      ILO_GPE_CMD(0x3, 0x1, 0x0f);
-   const uint8_t cmd_len = 3;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   /* see ilo_gpe_init_zs_surface() */
-   ilo_cp_write(cp, zs->payload[8]);
-   ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
-         INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
-                               uint32_t clear_val,
-                               struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
-   const uint8_t cmd_len = 2;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2) |
-                    GEN5_DEPTH_CLEAR_VALID);
-   ilo_cp_write(cp, clear_val);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
-                       uint32_t dw1,
-                       struct intel_bo *bo, uint32_t bo_offset,
-                       bool write_qword,
-                       struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
-   const uint8_t cmd_len = (write_qword) ? 5 : 4;
-   const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
-   const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   if (dw1 & PIPE_CONTROL_CS_STALL) {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 73:
-       *
-       *     "1 of the following must also be set (when CS stall is set):
-       *
-       *       * Depth Cache Flush Enable ([0] of DW1)
-       *       * Stall at Pixel Scoreboard ([1] of DW1)
-       *       * Depth Stall ([13] of DW1)
-       *       * Post-Sync Operation ([13] of DW1)
-       *       * Render Target Cache Flush Enable ([12] of DW1)
-       *       * Notify Enable ([8] of DW1)"
-       *
-       * From the Ivy Bridge PRM, volume 2 part 1, page 61:
-       *
-       *     "One of the following must also be set (when CS stall is set):
-       *
-       *       * Render Target Cache Flush Enable ([12] of DW1)
-       *       * Depth Cache Flush Enable ([0] of DW1)
-       *       * Stall at Pixel Scoreboard ([1] of DW1)
-       *       * Depth Stall ([13] of DW1)
-       *       * Post-Sync Operation ([13] of DW1)"
-       */
-      uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
-                          PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                          PIPE_CONTROL_STALL_AT_SCOREBOARD |
-                          PIPE_CONTROL_DEPTH_STALL;
-
-      /* post-sync op */
-      bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
-                  PIPE_CONTROL_WRITE_DEPTH_COUNT |
-                  PIPE_CONTROL_WRITE_TIMESTAMP;
-
-      if (dev->gen == ILO_GEN(6))
-         bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
-
-      assert(dw1 & bit_test);
-   }
-
-   if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 73:
-       *
-       *     "Following bits must be clear (when Depth Stall is set):
-       *
-       *       * Render Target Cache Flush Enable ([12] of DW1)
-       *       * Depth Cache Flush Enable ([0] of DW1)"
-       */
-      assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
-                      PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
-   ilo_cp_write(cp, 0);
-   if (write_qword)
-      ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
-}
-
-static void
-gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
-                      const struct pipe_draw_info *info,
-                      const struct ilo_ib_state *ib,
-                      bool rectlist,
-                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
-   const uint8_t cmd_len = 6;
-   const int prim = (rectlist) ?
-      _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
-   const int vb_access = (info->indexed) ?
-      GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
-      GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
-   const uint32_t vb_start = info->start +
-      ((info->indexed) ? ib->draw_start_offset : 0);
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2) |
-                    prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
-                    vb_access);
-   ilo_cp_write(cp, info->count);
-   ilo_cp_write(cp, vb_start);
-   ilo_cp_write(cp, info->instance_count);
-   ilo_cp_write(cp, info->start_instance);
-   ilo_cp_write(cp, info->index_bias);
-   ilo_cp_end(cp);
-}
-
-static uint32_t
-gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
-                                    const struct ilo_shader_state **cs,
-                                    uint32_t *sampler_state,
-                                    int *num_samplers,
-                                    uint32_t *binding_table_state,
-                                    int *num_surfaces,
-                                    int num_ids,
-                                    struct ilo_cp *cp)
-{
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 2, page 34:
-    *
-    *     "(Interface Descriptor Total Length) This field must have the same
-    *      alignment as the Interface Descriptor Data Start Address.
-    *
-    *      It must be DQWord (32-byte) aligned..."
-    *
-    * From the Sandy Bridge PRM, volume 2 part 2, page 35:
-    *
-    *     "(Interface Descriptor Data Start Address) Specifies the 32-byte
-    *      aligned address of the Interface Descriptor data."
-    */
-   const int state_align = 32 / 4;
-   const int state_len = (32 / 4) * num_ids;
-   uint32_t state_offset, *dw;
-   int i;
+   /* do not increment reference count */
+   zs->bo = info.zs.bo;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   /* separate stencil */
+   if (info.stencil.bo) {
+      assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
+             info.stencil.stride % 128 == 0);
+
+      zs->payload[6] = info.stencil.stride - 1;
+      zs->payload[7] = info.stencil.offset;
 
-   dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
-         state_len, state_align, &state_offset);
-
-   for (i = 0; i < num_ids; i++) {
-      dw[0] = ilo_shader_get_kernel_offset(cs[i]);
-      dw[1] = 1 << 18; /* SPF */
-      dw[2] = sampler_state[i] |
-              (num_samplers[i] + 3) / 4 << 2;
-      dw[3] = binding_table_state[i] |
-              num_surfaces[i];
-      dw[4] = 0 << 16 |  /* CURBE Read Length */
-              0;         /* CURBE Read Offset */
-      dw[5] = 0; /* Barrier ID */
-      dw[6] = 0;
-      dw[7] = 0;
-
-      dw += 8;
+      /* do not increment reference count */
+      zs->separate_s8_bo = info.stencil.bo;
+   }
+   else {
+      zs->payload[6] = 0;
+      zs->payload[7] = 0;
+      zs->separate_s8_bo = NULL;
    }
 
-   return state_offset;
+   /* hiz */
+   if (info.hiz.bo) {
+      zs->payload[8] = info.hiz.stride - 1;
+      zs->payload[9] = info.hiz.offset;
+
+      /* do not increment reference count */
+      zs->hiz_bo = info.hiz.bo;
+   }
+   else {
+      zs->payload[8] = 0;
+      zs->payload[9] = 0;
+      zs->hiz_bo = NULL;
+   }
 }
 
 static void
@@ -3248,149 +1431,6 @@ ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
    vp->max_z = scale_z *  1.0f + state->translate[2];
 }
 
-static uint32_t
-gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
-                      const struct ilo_viewport_cso *viewports,
-                      unsigned num_viewports,
-                      struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = 8 * num_viewports;
-   uint32_t state_offset, *dw;
-   unsigned i;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 262:
-    *
-    *     "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
-    *      stored as an array of up to 16 elements..."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
-   dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
-         state_len, state_align, &state_offset);
-
-   for (i = 0; i < num_viewports; i++) {
-      const struct ilo_viewport_cso *vp = &viewports[i];
-
-      dw[0] = fui(vp->m00);
-      dw[1] = fui(vp->m11);
-      dw[2] = fui(vp->m22);
-      dw[3] = fui(vp->m30);
-      dw[4] = fui(vp->m31);
-      dw[5] = fui(vp->m32);
-      dw[6] = 0;
-      dw[7] = 0;
-
-      dw += 8;
-   }
-
-   return state_offset;
-}
-
-static uint32_t
-gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
-                        const struct ilo_viewport_cso *viewports,
-                        unsigned num_viewports,
-                        struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = 4 * num_viewports;
-   uint32_t state_offset, *dw;
-   unsigned i;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 193:
-    *
-    *     "The viewport-related state is stored as an array of up to 16
-    *      elements..."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
-   dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
-         state_len, state_align, &state_offset);
-
-   for (i = 0; i < num_viewports; i++) {
-      const struct ilo_viewport_cso *vp = &viewports[i];
-
-      dw[0] = fui(vp->min_gbx);
-      dw[1] = fui(vp->max_gbx);
-      dw[2] = fui(vp->min_gby);
-      dw[3] = fui(vp->max_gby);
-
-      dw += 4;
-   }
-
-   return state_offset;
-}
-
-static uint32_t
-gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
-                      const struct ilo_viewport_cso *viewports,
-                      unsigned num_viewports,
-                      struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = 2 * num_viewports;
-   uint32_t state_offset, *dw;
-   unsigned i;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 385:
-    *
-    *     "The viewport state is stored as an array of up to 16 elements..."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
-   dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
-         state_len, state_align, &state_offset);
-
-   for (i = 0; i < num_viewports; i++) {
-      const struct ilo_viewport_cso *vp = &viewports[i];
-
-      dw[0] = fui(vp->min_z);
-      dw[1] = fui(vp->max_z);
-
-      dw += 2;
-   }
-
-   return state_offset;
-}
-
-static uint32_t
-gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
-                           const struct pipe_stencil_ref *stencil_ref,
-                           float alpha_ref,
-                           const struct pipe_blend_color *blend_color,
-                           struct ilo_cp *cp)
-{
-   const int state_align = 64 / 4;
-   const int state_len = 6;
-   uint32_t state_offset, *dw;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
-         state_len, state_align, &state_offset);
-
-   dw[0] = stencil_ref->ref_value[0] << 24 |
-           stencil_ref->ref_value[1] << 16 |
-           BRW_ALPHATEST_FORMAT_UNORM8;
-   dw[1] = float_to_ubyte(alpha_ref);
-   dw[2] = fui(blend_color->color[0]);
-   dw[3] = fui(blend_color->color[1]);
-   dw[4] = fui(blend_color->color[2]);
-   dw[5] = fui(blend_color->color[3]);
-
-   return state_offset;
-}
-
 static int
 gen6_blend_factor_dst_alpha_forced_one(int factor)
 {
@@ -3537,134 +1577,6 @@ ilo_gpe_init_blend(const struct ilo_dev_info *dev,
    }
 }
 
-static uint32_t
-gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
-                      const struct ilo_blend_state *blend,
-                      const struct ilo_fb_state *fb,
-                      const struct pipe_alpha_state *alpha,
-                      struct ilo_cp *cp)
-{
-   const int state_align = 64 / 4;
-   int state_len;
-   uint32_t state_offset, *dw;
-   unsigned num_targets, i;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 376:
-    *
-    *     "The blend state is stored as an array of up to 8 elements..."
-    */
-   num_targets = fb->state.nr_cbufs;
-   assert(num_targets <= 8);
-
-   if (!num_targets) {
-      if (!alpha->enabled)
-         return 0;
-      /* to be able to reference alpha func */
-      num_targets = 1;
-   }
-
-   state_len = 2 * num_targets;
-
-   dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
-         state_len, state_align, &state_offset);
-
-   for (i = 0; i < num_targets; i++) {
-      const unsigned idx = (blend->independent_blend_enable) ? i : 0;
-      const struct ilo_blend_cso *cso = &blend->cso[idx];
-      const int num_samples = fb->num_samples;
-      const struct util_format_description *format_desc =
-         (idx < fb->state.nr_cbufs) ?
-         util_format_description(fb->state.cbufs[idx]->format) : NULL;
-      bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
-
-      rt_is_unorm = true;
-      rt_is_pure_integer = false;
-      rt_dst_alpha_forced_one = false;
-
-      if (format_desc) {
-         int ch;
-
-         switch (format_desc->format) {
-         case PIPE_FORMAT_B8G8R8X8_UNORM:
-            /* force alpha to one when the HW format has alpha */
-            assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
-                  == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
-            rt_dst_alpha_forced_one = true;
-            break;
-         default:
-            break;
-         }
-
-         for (ch = 0; ch < 4; ch++) {
-            if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
-               continue;
-
-            if (format_desc->channel[ch].pure_integer) {
-               rt_is_unorm = false;
-               rt_is_pure_integer = true;
-               break;
-            }
-
-            if (!format_desc->channel[ch].normalized ||
-                format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
-               rt_is_unorm = false;
-         }
-      }
-
-      dw[0] = cso->payload[0];
-      dw[1] = cso->payload[1];
-
-      if (!rt_is_pure_integer) {
-         if (rt_dst_alpha_forced_one)
-            dw[0] |= cso->dw_blend_dst_alpha_forced_one;
-         else
-            dw[0] |= cso->dw_blend;
-      }
-
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 365:
-       *
-       *     "Logic Ops are only supported on *_UNORM surfaces (excluding
-       *      _SRGB variants), otherwise Logic Ops must be DISABLED."
-       *
-       * Since logicop is ignored for non-UNORM color buffers, no special care
-       * is needed.
-       */
-      if (rt_is_unorm)
-         dw[1] |= cso->dw_logicop;
-
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 356:
-       *
-       *     "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
-       *      Dither both must be disabled."
-       *
-       * There is no such limitation on GEN7, or for AlphaToOne.  But GL
-       * requires that anyway.
-       */
-      if (num_samples > 1)
-         dw[1] |= cso->dw_alpha_mod;
-
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 382:
-       *
-       *     "Alpha Test can only be enabled if Pixel Shader outputs a float
-       *      alpha value."
-       */
-      if (alpha->enabled && !rt_is_pure_integer) {
-         dw[1] |= 1 << 16 |
-                  gen6_translate_dsa_func(alpha->func) << 13;
-      }
-
-      dw += 2;
-   }
-
-   return state_offset;
-}
-
 void
 ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
                  const struct pipe_depth_stencil_alpha_state *state,
@@ -3748,28 +1660,6 @@ ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
       dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
 }
 
-static uint32_t
-gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
-                              const struct ilo_dsa_state *dsa,
-                              struct ilo_cp *cp)
-{
-   const int state_align = 64 / 4;
-   const int state_len = 3;
-   uint32_t state_offset, *dw;
-
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
-         state_len, state_align, &state_offset);
-
-   dw[0] = dsa->payload[0];
-   dw[1] = dsa->payload[1];
-   dw[2] = dsa->payload[2];
-
-   return state_offset;
-}
-
 void
 ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
                     unsigned start_slot,
@@ -3820,64 +1710,6 @@ ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
    }
 }
 
-static uint32_t
-gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
-                       const struct ilo_scissor_state *scissor,
-                       unsigned num_viewports,
-                       struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = 2 * num_viewports;
-   uint32_t state_offset, *dw;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 263:
-    *
-    *     "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
-    *      stored as an array of up to 16 elements..."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
-   dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
-         state_len, state_align, &state_offset);
-
-   memcpy(dw, scissor->payload, state_len * 4);
-
-   return state_offset;
-}
-
-static uint32_t
-gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
-                              uint32_t *surface_states,
-                              int num_surface_states,
-                              struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = num_surface_states;
-   uint32_t state_offset, *dw;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 69:
-    *
-    *     "It is stored as an array of up to 256 elements..."
-    */
-   assert(num_surface_states <= 256);
-
-   if (!num_surface_states)
-      return 0;
-
-   dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
-         state_len, state_align, &state_offset);
-   memcpy(dw, surface_states,
-         num_surface_states * sizeof(surface_states[0]));
-
-   return state_offset;
-}
-
 void
 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
                                     unsigned width, unsigned height,
@@ -4236,92 +2068,6 @@ ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
    surf->bo = tex->bo;
 }
 
-static uint32_t
-gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
-                        const struct ilo_view_surface *surf,
-                        bool for_render,
-                        struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
-   uint32_t state_offset;
-   uint32_t read_domains, write_domain;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   if (for_render) {
-      read_domains = INTEL_DOMAIN_RENDER;
-      write_domain = INTEL_DOMAIN_RENDER;
-   }
-   else {
-      read_domains = INTEL_DOMAIN_SAMPLER;
-      write_domain = 0;
-   }
-
-   ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
-
-   STATIC_ASSERT(Elements(surf->payload) >= 8);
-
-   ilo_cp_write(cp, surf->payload[0]);
-   ilo_cp_write_bo(cp, surf->payload[1],
-         surf->bo, read_domains, write_domain);
-   ilo_cp_write(cp, surf->payload[2]);
-   ilo_cp_write(cp, surf->payload[3]);
-   ilo_cp_write(cp, surf->payload[4]);
-   ilo_cp_write(cp, surf->payload[5]);
-
-   if (dev->gen >= ILO_GEN(7)) {
-      ilo_cp_write(cp, surf->payload[6]);
-      ilo_cp_write(cp, surf->payload[7]);
-   }
-
-   ilo_cp_end(cp);
-
-   return state_offset;
-}
-
-static uint32_t
-gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
-                           const struct pipe_stream_output_target *so,
-                           const struct pipe_stream_output_info *so_info,
-                           int so_index,
-                           struct ilo_cp *cp)
-{
-   struct ilo_buffer *buf = ilo_buffer(so->buffer);
-   unsigned bo_offset, struct_size;
-   enum pipe_format elem_format;
-   struct ilo_view_surface surf;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
-
-   bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
-   struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
-
-   switch (so_info->output[so_index].num_components) {
-   case 1:
-      elem_format = PIPE_FORMAT_R32_FLOAT;
-      break;
-   case 2:
-      elem_format = PIPE_FORMAT_R32G32_FLOAT;
-      break;
-   case 3:
-      elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
-      break;
-   case 4:
-      elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-      break;
-   default:
-      assert(!"unexpected SO components length");
-      elem_format = PIPE_FORMAT_R32_FLOAT;
-      break;
-   }
-
-   ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
-         struct_size, elem_format, false, true, &surf);
-
-   return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
-}
-
 static void
 sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
                                const union pipe_color_union *color,
@@ -4680,165 +2426,6 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
    }
 }
 
-static uint32_t
-gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
-                        const struct ilo_sampler_cso * const *samplers,
-                        const struct pipe_sampler_view * const *views,
-                        const uint32_t *sampler_border_colors,
-                        int num_samplers,
-                        struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = 4 * num_samplers;
-   uint32_t state_offset, *dw;
-   int i;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 101:
-    *
-    *     "The sampler state is stored as an array of up to 16 elements..."
-    */
-   assert(num_samplers <= 16);
-
-   if (!num_samplers)
-      return 0;
-
-   dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
-         state_len, state_align, &state_offset);
-
-   for (i = 0; i < num_samplers; i++) {
-      const struct ilo_sampler_cso *sampler = samplers[i];
-      const struct pipe_sampler_view *view = views[i];
-      const uint32_t border_color = sampler_border_colors[i];
-      uint32_t dw_filter, dw_wrap;
-
-      /* there may be holes */
-      if (!sampler || !view) {
-         /* disabled sampler */
-         dw[0] = 1 << 31;
-         dw[1] = 0;
-         dw[2] = 0;
-         dw[3] = 0;
-         dw += 4;
-
-         continue;
-      }
-
-      /* determine filter and wrap modes */
-      switch (view->texture->target) {
-      case PIPE_TEXTURE_1D:
-         dw_filter = (sampler->anisotropic) ?
-            sampler->dw_filter_aniso : sampler->dw_filter;
-         dw_wrap = sampler->dw_wrap_1d;
-         break;
-      case PIPE_TEXTURE_3D:
-         /*
-          * From the Sandy Bridge PRM, volume 4 part 1, page 103:
-          *
-          *     "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
-          *      surfaces of type SURFTYPE_3D."
-          */
-         dw_filter = sampler->dw_filter;
-         dw_wrap = sampler->dw_wrap;
-         break;
-      case PIPE_TEXTURE_CUBE:
-         dw_filter = (sampler->anisotropic) ?
-            sampler->dw_filter_aniso : sampler->dw_filter;
-         dw_wrap = sampler->dw_wrap_cube;
-         break;
-      default:
-         dw_filter = (sampler->anisotropic) ?
-            sampler->dw_filter_aniso : sampler->dw_filter;
-         dw_wrap = sampler->dw_wrap;
-         break;
-      }
-
-      dw[0] = sampler->payload[0];
-      dw[1] = sampler->payload[1];
-      assert(!(border_color & 0x1f));
-      dw[2] = border_color;
-      dw[3] = sampler->payload[2];
-
-      dw[0] |= dw_filter;
-
-      if (dev->gen >= ILO_GEN(7)) {
-         dw[3] |= dw_wrap;
-      }
-      else {
-         /*
-          * From the Sandy Bridge PRM, volume 4 part 1, page 21:
-          *
-          *     "[DevSNB] Errata: Incorrect behavior is observed in cases
-          *      where the min and mag mode filters are different and
-          *      SurfMinLOD is nonzero. The determination of MagMode uses the
-          *      following equation instead of the one in the above
-          *      pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
-          *
-          * As a way to work around that, we set Base to
-          * view->u.tex.first_level.
-          */
-         dw[0] |= view->u.tex.first_level << 22;
-
-         dw[1] |= dw_wrap;
-      }
-
-      dw += 4;
-   }
-
-   return state_offset;
-}
-
-static uint32_t
-gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
-                                     const struct ilo_sampler_cso *sampler,
-                                     struct ilo_cp *cp)
-{
-   const int state_align = 32 / 4;
-   const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
-   uint32_t state_offset, *dw;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
-         state_len, state_align, &state_offset);
-
-   /* see ilo_gpe_init_sampler_cso() */
-   memcpy(dw, &sampler->payload[3], state_len * 4);
-
-   return state_offset;
-}
-
-static uint32_t
-gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
-                               int size, void **pcb,
-                               struct ilo_cp *cp)
-{
-   /*
-    * For all VS, GS, FS, and CS push constant buffers, they must be aligned
-    * to 32 bytes, and their sizes are specified in 256-bit units.
-    */
-   const int state_align = 32 / 4;
-   const int state_len = align(size, 32) / 4;
-   uint32_t state_offset;
-   char *buf;
-
-   ILO_GPE_VALID_GEN(dev, 6, 7);
-
-   buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
-         state_len, state_align, &state_offset);
-
-   /* zero out the unused range */
-   if (size < state_len * 4)
-      memset(&buf[size], 0, state_len * 4 - size);
-
-   if (pcb)
-      *pcb = buf;
-
-   return state_offset;
-}
-
 static int
 gen6_estimate_command_size(const struct ilo_dev_info *dev,
                            enum ilo_gpe_gen6_command cmd,
index 300176428c19a9a86f7c5270dbeb34a1c24d1b69..7d4bbb59dfe5b2414a95eeef5d1139f9c88d1f0a 100644 (file)
 #ifndef ILO_GPE_GEN6_H
 #define ILO_GPE_GEN6_H
 
+#include "util/u_dual_blend.h"
+#include "util/u_half.h"
+#include "brw_defines.h"
+#include "intel_reg.h"
+
 #include "ilo_common.h"
+#include "ilo_cp.h"
+#include "ilo_format.h"
+#include "ilo_resource.h"
+#include "ilo_shader.h"
 #include "ilo_gpe.h"
 
 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
@@ -533,27 +542,2417 @@ ilo_gpe_gen6_get(void);
 
 /* Below are helpers for other GENs */
 
-int
-ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling);
+/**
+ * Translate a depth/stencil pipe format to the matching hardware
+ * format.  Return -1 on errors.
+ */
+static inline int
+gen6_translate_depth_format(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_Z16_UNORM:
+      return BRW_DEPTHFORMAT_D16_UNORM;
+   case PIPE_FORMAT_Z32_FLOAT:
+      return BRW_DEPTHFORMAT_D32_FLOAT;
+   case PIPE_FORMAT_Z24X8_UNORM:
+      return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+      return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
+   default:
+      return -1;
+   }
+}
+
+/**
+ * Translate winsys tiling to hardware tiling.
+ */
+static inline int
+ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
+{
+   switch (tiling) {
+   case INTEL_TILING_NONE:
+      return 0;
+   case INTEL_TILING_X:
+      return BRW_SURFACE_TILED;
+   case INTEL_TILING_Y:
+      return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
+   default:
+      assert(!"unknown tiling");
+      return 0;
+   }
+}
 
-int
-ilo_gpe_gen6_translate_pipe_prim(unsigned prim);
+/**
+ * Translate a pipe primitive type to the matching hardware primitive type.
+ */
+static inline int
+ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
+{
+   static const int prim_mapping[PIPE_PRIM_MAX] = {
+      [PIPE_PRIM_POINTS]                     = _3DPRIM_POINTLIST,
+      [PIPE_PRIM_LINES]                      = _3DPRIM_LINELIST,
+      [PIPE_PRIM_LINE_LOOP]                  = _3DPRIM_LINELOOP,
+      [PIPE_PRIM_LINE_STRIP]                 = _3DPRIM_LINESTRIP,
+      [PIPE_PRIM_TRIANGLES]                  = _3DPRIM_TRILIST,
+      [PIPE_PRIM_TRIANGLE_STRIP]             = _3DPRIM_TRISTRIP,
+      [PIPE_PRIM_TRIANGLE_FAN]               = _3DPRIM_TRIFAN,
+      [PIPE_PRIM_QUADS]                      = _3DPRIM_QUADLIST,
+      [PIPE_PRIM_QUAD_STRIP]                 = _3DPRIM_QUADSTRIP,
+      [PIPE_PRIM_POLYGON]                    = _3DPRIM_POLYGON,
+      [PIPE_PRIM_LINES_ADJACENCY]            = _3DPRIM_LINELIST_ADJ,
+      [PIPE_PRIM_LINE_STRIP_ADJACENCY]       = _3DPRIM_LINESTRIP_ADJ,
+      [PIPE_PRIM_TRIANGLES_ADJACENCY]        = _3DPRIM_TRILIST_ADJ,
+      [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]   = _3DPRIM_TRISTRIP_ADJ,
+   };
+
+   assert(prim_mapping[prim]);
+
+   return prim_mapping[prim];
+}
 
-int
-ilo_gpe_gen6_translate_texture(enum pipe_texture_target target);
+/**
+ * Translate a pipe texture target to the matching hardware surface type.
+ */
+static inline int
+ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
+{
+   switch (target) {
+   case PIPE_BUFFER:
+      return BRW_SURFACE_BUFFER;
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+      return BRW_SURFACE_1D;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_2D_ARRAY:
+      return BRW_SURFACE_2D;
+   case PIPE_TEXTURE_3D:
+      return BRW_SURFACE_3D;
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return BRW_SURFACE_CUBE;
+   default:
+      assert(!"unknown texture target");
+      return BRW_SURFACE_BUFFER;
+   }
+}
 
-void
+/**
+ * Fill in DW2 to DW7 of 3DSTATE_SF.
+ */
+static inline void
 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
                                     const struct ilo_rasterizer_state *rasterizer,
                                     int num_samples,
                                     enum pipe_format depth_format,
-                                    uint32_t *payload, unsigned payload_len);
+                                    uint32_t *payload, unsigned payload_len)
+{
+   const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
+
+   assert(payload_len == Elements(sf->payload));
+
+   if (sf) {
+      memcpy(payload, sf->payload, sizeof(sf->payload));
+
+      if (num_samples > 1)
+         payload[1] |= sf->dw_msaa;
+
+      if (dev->gen >= ILO_GEN(7)) {
+         int format;
+
+         /* separate stencil */
+         switch (depth_format) {
+         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+            depth_format = PIPE_FORMAT_Z24X8_UNORM;
+            break;
+         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+            depth_format = PIPE_FORMAT_Z32_FLOAT;;
+            break;
+         case PIPE_FORMAT_S8_UINT:
+            depth_format = PIPE_FORMAT_NONE;
+            break;
+         default:
+            break;
+         }
+
+         format = gen6_translate_depth_format(depth_format);
+         /* FLOAT surface is assumed when there is no depth buffer */
+         if (format < 0)
+            format = BRW_DEPTHFORMAT_D32_FLOAT;
+
+         payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
+      }
+   }
+   else {
+      payload[0] = 0;
+      payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
+      payload[2] = 0;
+      payload[3] = 0;
+      payload[4] = 0;
+      payload[5] = 0;
+   }
+}
 
-void
+/**
+ * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
+ */
+static inline void
 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
                                  const struct ilo_rasterizer_state *rasterizer,
                                  const struct ilo_shader_state *fs,
                                  const struct ilo_shader_state *last_sh,
-                                 uint32_t *dw, int num_dwords);
+                                 uint32_t *dw, int num_dwords)
+{
+   int output_count, vue_offset, vue_len;
+   const struct ilo_kernel_routing *routing;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+   assert(num_dwords == 13);
+
+   if (!fs) {
+      memset(dw, 0, sizeof(dw[0]) * num_dwords);
+
+      if (dev->gen >= ILO_GEN(7))
+         dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
+      else
+         dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
+
+      return;
+   }
+
+   output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+   assert(output_count <= 32);
+
+   routing = ilo_shader_get_kernel_routing(fs);
+
+   vue_offset = routing->source_skip;
+   assert(vue_offset % 2 == 0);
+   vue_offset /= 2;
+
+   vue_len = (routing->source_len + 1) / 2;
+   if (!vue_len)
+      vue_len = 1;
+
+   if (dev->gen >= ILO_GEN(7)) {
+      dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
+              vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+              vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+      if (routing->swizzle_enable)
+         dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
+   }
+   else {
+      dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
+              vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+              vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+      if (routing->swizzle_enable)
+         dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
+   }
+
+   switch (rasterizer->state.sprite_coord_mode) {
+   case PIPE_SPRITE_COORD_UPPER_LEFT:
+      dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
+      break;
+   case PIPE_SPRITE_COORD_LOWER_LEFT:
+      dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
+      break;
+   }
+
+   STATIC_ASSERT(Elements(routing->swizzles) >= 16);
+   memcpy(&dw[1], routing->swizzles, 2 * 16);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 268:
+    *
+    *     "This field (Point Sprite Texture Coordinate Enable) must be
+    *      programmed to 0 when non-point primitives are rendered."
+    *
+    * TODO We do not check that yet.
+    */
+   dw[9] = routing->point_sprite_enable;
+
+   dw[10] = routing->const_interp_enable;
+
+   /* WrapShortest enables */
+   dw[11] = 0;
+   dw[12] = 0;
+}
+
+static inline void
+gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
+                             struct intel_bo *general_state_bo,
+                             struct intel_bo *surface_state_bo,
+                             struct intel_bo *dynamic_state_bo,
+                             struct intel_bo *indirect_object_bo,
+                             struct intel_bo *instruction_bo,
+                             uint32_t general_state_size,
+                             uint32_t dynamic_state_size,
+                             uint32_t indirect_object_size,
+                             uint32_t instruction_size,
+                             struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
+   const uint8_t cmd_len = 10;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /* 4K-page aligned */
+   assert(((general_state_size | dynamic_state_size |
+            indirect_object_size | instruction_size) & 0xfff) == 0);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+
+   ilo_cp_write_bo(cp, 1, general_state_bo,
+                       INTEL_DOMAIN_RENDER,
+                       0);
+   ilo_cp_write_bo(cp, 1, surface_state_bo,
+                       INTEL_DOMAIN_SAMPLER,
+                       0);
+   ilo_cp_write_bo(cp, 1, dynamic_state_bo,
+                       INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
+                       0);
+   ilo_cp_write_bo(cp, 1, indirect_object_bo,
+                       0,
+                       0);
+   ilo_cp_write_bo(cp, 1, instruction_bo,
+                       INTEL_DOMAIN_INSTRUCTION,
+                       0);
+
+   if (general_state_size) {
+      ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
+                          INTEL_DOMAIN_RENDER,
+                          0);
+   }
+   else {
+      /* skip range check */
+      ilo_cp_write(cp, 1);
+   }
+
+   if (dynamic_state_size) {
+      ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
+                          INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
+                          0);
+   }
+   else {
+      /* skip range check */
+      ilo_cp_write(cp, 0xfffff000 + 1);
+   }
+
+   if (indirect_object_size) {
+      ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
+                          0,
+                          0);
+   }
+   else {
+      /* skip range check */
+      ilo_cp_write(cp, 0xfffff000 + 1);
+   }
+
+   if (instruction_size) {
+      ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
+                          INTEL_DOMAIN_INSTRUCTION,
+                          0);
+   }
+   else {
+      /* skip range check */
+      ilo_cp_write(cp, 1);
+   }
+
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
+                    uint32_t sip,
+                    struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
+   const uint8_t cmd_len = 2;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
+   ilo_cp_write(cp, cmd);
+   ilo_cp_write(cp, sip);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
+                                bool enable,
+                                struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
+   const uint8_t cmd_len = 1;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | enable);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
+                          int pipeline,
+                          struct ilo_cp *cp)
+{
+   const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
+   const uint8_t cmd_len = 1;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /* 3D or media */
+   assert(pipeline == 0x0 || pipeline == 0x1);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | pipeline);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
+                          int max_threads, int num_urb_entries,
+                          int urb_entry_size,
+                          struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
+   const uint8_t cmd_len = 8;
+   uint32_t dw2, dw4;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   dw2 = (max_threads - 1) << 16 |
+         num_urb_entries << 8 |
+         1 << 7 | /* Reset Gateway Timer */
+         1 << 6;  /* Bypass Gateway Control */
+
+   dw4 = urb_entry_size << 16 |  /* URB Entry Allocation Size */
+         480;                    /* CURBE Allocation Size */
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, 0); /* scratch */
+   ilo_cp_write(cp, dw2);
+   ilo_cp_write(cp, 0); /* MBZ */
+   ilo_cp_write(cp, dw4);
+   ilo_cp_write(cp, 0); /* scoreboard */
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
+                          uint32_t buf, int size,
+                          struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
+   const uint8_t cmd_len = 4;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   assert(buf % 32 == 0);
+   /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
+   size = align(size, 32);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, 0); /* MBZ */
+   ilo_cp_write(cp, size);
+   ilo_cp_write(cp, buf);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
+                                          uint32_t offset, int num_ids,
+                                          struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
+   const uint8_t cmd_len = 4;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   assert(offset % 32 == 0);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, 0); /* MBZ */
+   /* every ID has 8 DWords */
+   ilo_cp_write(cp, num_ids * 8 * 4);
+   ilo_cp_write(cp, offset);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
+                              int id, int byte, int thread_count,
+                              struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
+   const uint8_t cmd_len = 2;
+   uint32_t dw1;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   dw1 = id << 16 |
+         byte << 8 |
+         thread_count;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, dw1);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
+                            int thread_count_water_mark,
+                            int barrier_mask,
+                            struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
+   const uint8_t cmd_len = 2;
+   uint32_t dw1;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   dw1 = thread_count_water_mark << 16 |
+         barrier_mask;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, dw1);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
+                              struct ilo_cp *cp)
+{
+   assert(!"MEDIA_OBJECT_WALKER unsupported");
+}
+
+static inline void
+gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
+                                         uint32_t vs_binding_table,
+                                         uint32_t gs_binding_table,
+                                         uint32_t ps_binding_table,
+                                         struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
+   const uint8_t cmd_len = 4;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2) |
+                    GEN6_BINDING_TABLE_MODIFY_VS |
+                    GEN6_BINDING_TABLE_MODIFY_GS |
+                    GEN6_BINDING_TABLE_MODIFY_PS);
+   ilo_cp_write(cp, vs_binding_table);
+   ilo_cp_write(cp, gs_binding_table);
+   ilo_cp_write(cp, ps_binding_table);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
+                                         uint32_t vs_sampler_state,
+                                         uint32_t gs_sampler_state,
+                                         uint32_t ps_sampler_state,
+                                         struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
+   const uint8_t cmd_len = 4;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2) |
+                    VS_SAMPLER_STATE_CHANGE |
+                    GS_SAMPLER_STATE_CHANGE |
+                    PS_SAMPLER_STATE_CHANGE);
+   ilo_cp_write(cp, vs_sampler_state);
+   ilo_cp_write(cp, gs_sampler_state);
+   ilo_cp_write(cp, ps_sampler_state);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
+                      int vs_total_size, int gs_total_size,
+                      int vs_entry_size, int gs_entry_size,
+                      struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
+   const uint8_t cmd_len = 3;
+   const int row_size = 128; /* 1024 bits */
+   int vs_alloc_size, gs_alloc_size;
+   int vs_num_entries, gs_num_entries;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   /* in 1024-bit URB rows */
+   vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
+   gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
+
+   /* the valid range is [1, 5] */
+   if (!vs_alloc_size)
+      vs_alloc_size = 1;
+   if (!gs_alloc_size)
+      gs_alloc_size = 1;
+   assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
+
+   /* the valid range is [24, 256] in multiples of 4 */
+   vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
+   if (vs_num_entries > 256)
+      vs_num_entries = 256;
+   assert(vs_num_entries >= 24);
+
+   /* the valid range is [0, 256] in multiples of 4 */
+   gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
+   if (gs_num_entries > 256)
+      gs_num_entries = 256;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
+                    vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
+   ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
+                    (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
+                                 const struct ilo_ve_state *ve,
+                                 const struct ilo_vb_state *vb,
+                                 struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
+   uint8_t cmd_len;
+   unsigned hw_idx;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 82:
+    *
+    *     "From 1 to 33 VBs can be specified..."
+    */
+   assert(ve->vb_count <= 33);
+
+   if (!ve->vb_count)
+      return;
+
+   cmd_len = 1 + 4 * ve->vb_count;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+
+   for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+      const unsigned instance_divisor = ve->instance_divisors[hw_idx];
+      const unsigned pipe_idx = ve->vb_mapping[hw_idx];
+      const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
+      uint32_t dw;
+
+      dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
+
+      if (instance_divisor)
+         dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
+      else
+         dw |= GEN6_VB0_ACCESS_VERTEXDATA;
+
+      if (dev->gen >= ILO_GEN(7))
+         dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+
+      /* use null vb if there is no buffer or the stride is out of range */
+      if (cso->buffer && cso->stride <= 2048) {
+         const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
+         const uint32_t start_offset = cso->buffer_offset;
+         /*
+          * As noted in ilo_translate_format(), we treat some 3-component
+          * formats as 4-component formats to work around hardware
+          * limitations.  Imagine the case where the vertex buffer holds a
+          * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
+          * The hardware would not be able to fetch it because the vertex
+          * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
+          * and that takes at least 8 bytes.
+          *
+          * For the workaround to work, we query the physical size, which is
+          * page aligned, to calculate end_offset so that the last vertex has
+          * a better chance to be fetched.
+          */
+         const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
+
+         dw |= cso->stride << BRW_VB0_PITCH_SHIFT;
+
+         ilo_cp_write(cp, dw);
+         ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+         ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+         ilo_cp_write(cp, instance_divisor);
+      }
+      else {
+         dw |= 1 << 13;
+
+         ilo_cp_write(cp, dw);
+         ilo_cp_write(cp, 0);
+         ilo_cp_write(cp, 0);
+         ilo_cp_write(cp, instance_divisor);
+      }
+   }
+
+   ilo_cp_end(cp);
+}
+
+static inline void
+ve_init_cso_with_components(const struct ilo_dev_info *dev,
+                            int comp0, int comp1, int comp2, int comp3,
+                            struct ilo_ve_cso *cso)
+{
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   STATIC_ASSERT(Elements(cso->payload) >= 2);
+   cso->payload[0] = GEN6_VE0_VALID;
+   cso->payload[1] =
+         comp0 << BRW_VE1_COMPONENT_0_SHIFT |
+         comp1 << BRW_VE1_COMPONENT_1_SHIFT |
+         comp2 << BRW_VE1_COMPONENT_2_SHIFT |
+         comp3 << BRW_VE1_COMPONENT_3_SHIFT;
+}
+
+static inline void
+ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
+                    struct ilo_ve_cso *cso)
+{
+   int format;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+    *
+    *     "- This bit (Edge Flag Enable) must only be ENABLED on the last
+    *        valid VERTEX_ELEMENT structure.
+    *
+    *      - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
+    *        and Component 1-3 Control must be set to VFCOMP_NOSTORE.
+    *
+    *      - The Source Element Format must be set to the UINT format.
+    *
+    *      - [DevSNB]: Edge Flags are not supported for QUADLIST
+    *        primitives.  Software may elect to convert QUADLIST primitives
+    *        to some set of corresponding edge-flag-supported primitive
+    *        types (e.g., POLYGONs) prior to submission to the 3D pipeline."
+    */
+
+   cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
+   cso->payload[1] =
+         BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
+         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
+         BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
+
+   /*
+    * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
+    * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
+    * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
+    *
+    * Since all the hardware cares about is whether the flags are zero or not,
+    * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
+    */
+   format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
+   if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
+      STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
+            BRW_SURFACEFORMAT_R32_FLOAT - 1);
+
+      cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
+   }
+   else {
+      assert(format == BRW_SURFACEFORMAT_R8_UINT);
+   }
+}
+
+static inline void
+gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
+                                  const struct ilo_ve_state *ve,
+                                  bool last_velement_edgeflag,
+                                  bool prepend_generated_ids,
+                                  struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
+   uint8_t cmd_len;
+   unsigned i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 93:
+    *
+    *     "Up to 34 (DevSNB+) vertex elements are supported."
+    */
+   assert(ve->count + prepend_generated_ids <= 34);
+
+   if (!ve->count && !prepend_generated_ids) {
+      struct ilo_ve_cso dummy;
+
+      ve_init_cso_with_components(dev,
+            BRW_VE1_COMPONENT_STORE_0,
+            BRW_VE1_COMPONENT_STORE_0,
+            BRW_VE1_COMPONENT_STORE_0,
+            BRW_VE1_COMPONENT_STORE_1_FLT,
+            &dummy);
+
+      cmd_len = 3;
+      ilo_cp_begin(cp, cmd_len);
+      ilo_cp_write(cp, cmd | (cmd_len - 2));
+      ilo_cp_write_multi(cp, dummy.payload, 2);
+      ilo_cp_end(cp);
+
+      return;
+   }
+
+   cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+
+   if (prepend_generated_ids) {
+      struct ilo_ve_cso gen_ids;
+
+      ve_init_cso_with_components(dev,
+            BRW_VE1_COMPONENT_STORE_VID,
+            BRW_VE1_COMPONENT_STORE_IID,
+            BRW_VE1_COMPONENT_NOSTORE,
+            BRW_VE1_COMPONENT_NOSTORE,
+            &gen_ids);
+
+      ilo_cp_write_multi(cp, gen_ids.payload, 2);
+   }
+
+   if (last_velement_edgeflag) {
+      struct ilo_ve_cso edgeflag;
+
+      for (i = 0; i < ve->count - 1; i++)
+         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
+
+      edgeflag = ve->cso[i];
+      ve_set_cso_edgeflag(dev, &edgeflag);
+      ilo_cp_write_multi(cp, edgeflag.payload, 2);
+   }
+   else {
+      for (i = 0; i < ve->count; i++)
+         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
+   }
+
+   ilo_cp_end(cp);
+}
+
+/**
+ * Translate an index size to the matching hardware index format.
+ */
+static inline int
+gen6_translate_index_size(int size)
+{
+   switch (size) {
+   case 4: return BRW_INDEX_DWORD;
+   case 2: return BRW_INDEX_WORD;
+   case 1: return BRW_INDEX_BYTE;
+   default:
+      assert(!"unknown index size");
+      return BRW_INDEX_BYTE;
+   }
+}
+
+static inline void
+gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
+                               const struct ilo_ib_state *ib,
+                               bool enable_cut_index,
+                               struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
+   const uint8_t cmd_len = 3;
+   struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
+   uint32_t start_offset, end_offset;
+   int format;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   if (!buf)
+      return;
+
+   format = gen6_translate_index_size(ib->hw_index_size);
+
+   /*
+    * set start_offset to 0 here and adjust pipe_draw_info::start with
+    * ib->draw_start_offset in 3DPRIMITIVE
+    */
+   start_offset = 0;
+   end_offset = buf->bo_size;
+
+   /* end_offset must also be aligned and is inclusive */
+   end_offset -= (end_offset % ib->hw_index_size);
+   end_offset--;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2) |
+                    ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
+                    format << 8);
+   ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+   ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
+                                          uint32_t clip_viewport,
+                                          uint32_t sf_viewport,
+                                          uint32_t cc_viewport,
+                                          struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
+   const uint8_t cmd_len = 4;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2) |
+                    GEN6_CLIP_VIEWPORT_MODIFY |
+                    GEN6_SF_VIEWPORT_MODIFY |
+                    GEN6_CC_VIEWPORT_MODIFY);
+   ilo_cp_write(cp, clip_viewport);
+   ilo_cp_write(cp, sf_viewport);
+   ilo_cp_write(cp, cc_viewport);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
+                                    uint32_t blend_state,
+                                    uint32_t depth_stencil_state,
+                                    uint32_t color_calc_state,
+                                    struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
+   const uint8_t cmd_len = 4;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, blend_state | 1);
+   ilo_cp_write(cp, depth_stencil_state | 1);
+   ilo_cp_write(cp, color_calc_state | 1);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
+                                         uint32_t scissor_rect,
+                                         struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
+   const uint8_t cmd_len = 2;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, scissor_rect);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
+                     const struct ilo_shader_state *vs,
+                     int num_samplers,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
+   const uint8_t cmd_len = 6;
+   const struct ilo_shader_cso *cso;
+   uint32_t dw2, dw4, dw5;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   if (!vs) {
+      ilo_cp_begin(cp, cmd_len);
+      ilo_cp_write(cp, cmd | (cmd_len - 2));
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_end(cp);
+      return;
+   }
+
+   cso = ilo_shader_get_kernel_cso(vs);
+   dw2 = cso->payload[0];
+   dw4 = cso->payload[1];
+   dw5 = cso->payload[2];
+
+   dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
+   ilo_cp_write(cp, dw2);
+   ilo_cp_write(cp, 0); /* scratch */
+   ilo_cp_write(cp, dw4);
+   ilo_cp_write(cp, dw5);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
+                     const struct ilo_shader_state *gs,
+                     const struct ilo_shader_state *vs,
+                     int verts_per_prim,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
+   const uint8_t cmd_len = 7;
+   uint32_t dw1, dw2, dw4, dw5, dw6;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   if (gs) {
+      const struct ilo_shader_cso *cso;
+
+      dw1 = ilo_shader_get_kernel_offset(gs);
+
+      cso = ilo_shader_get_kernel_cso(gs);
+      dw2 = cso->payload[0];
+      dw4 = cso->payload[1];
+      dw5 = cso->payload[2];
+      dw6 = cso->payload[3];
+   }
+   else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
+      struct ilo_shader_cso cso;
+      enum ilo_kernel_param param;
+
+      switch (verts_per_prim) {
+      case 1:
+         param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
+         break;
+      case 2:
+         param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
+         break;
+      default:
+         param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
+         break;
+      }
+
+      dw1 = ilo_shader_get_kernel_offset(vs) +
+         ilo_shader_get_kernel_param(vs, param);
+
+      /* cannot use VS's CSO */
+      ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
+      dw2 = cso.payload[0];
+      dw4 = cso.payload[1];
+      dw5 = cso.payload[2];
+      dw6 = cso.payload[3];
+   }
+   else {
+      dw1 = 0;
+      dw2 = 0;
+      dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
+      dw5 = GEN6_GS_STATISTICS_ENABLE;
+      dw6 = 0;
+   }
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, dw1);
+   ilo_cp_write(cp, dw2);
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, dw4);
+   ilo_cp_write(cp, dw5);
+   ilo_cp_write(cp, dw6);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
+                       const struct ilo_rasterizer_state *rasterizer,
+                       const struct ilo_shader_state *fs,
+                       bool enable_guardband,
+                       int num_viewports,
+                       struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
+   const uint8_t cmd_len = 4;
+   uint32_t dw1, dw2, dw3;
+
+   if (rasterizer) {
+      int interps;
+
+      dw1 = rasterizer->clip.payload[0];
+      dw2 = rasterizer->clip.payload[1];
+      dw3 = rasterizer->clip.payload[2];
+
+      if (enable_guardband && rasterizer->clip.can_enable_guardband)
+         dw2 |= GEN6_CLIP_GB_TEST;
+
+      interps = (fs) ?  ilo_shader_get_kernel_param(fs,
+            ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
+
+      if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
+                     1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
+                     1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
+         dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
+
+      dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
+             (num_viewports - 1);
+   }
+   else {
+      dw1 = 0;
+      dw2 = 0;
+      dw3 = 0;
+   }
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, dw1);
+   ilo_cp_write(cp, dw2);
+   ilo_cp_write(cp, dw3);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
+                     const struct ilo_rasterizer_state *rasterizer,
+                     const struct ilo_shader_state *fs,
+                     const struct ilo_shader_state *last_sh,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
+   const uint8_t cmd_len = 20;
+   uint32_t payload_raster[6], payload_sbe[13];
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
+         1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
+   ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
+         fs, last_sh, payload_sbe, Elements(payload_sbe));
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, payload_sbe[0]);
+   ilo_cp_write_multi(cp, payload_raster, 6);
+   ilo_cp_write_multi(cp, &payload_sbe[1], 12);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
+                     const struct ilo_shader_state *fs,
+                     int num_samplers,
+                     const struct ilo_rasterizer_state *rasterizer,
+                     bool dual_blend, bool cc_may_kill,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
+   const uint8_t cmd_len = 9;
+   const int num_samples = 1;
+   const struct ilo_shader_cso *fs_cso;
+   uint32_t dw2, dw4, dw5, dw6;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   if (!fs) {
+      /* see brwCreateContext() */
+      const int max_threads = (dev->gt == 2) ? 80 : 40;
+
+      ilo_cp_begin(cp, cmd_len);
+      ilo_cp_write(cp, cmd | (cmd_len - 2));
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      /* honor the valid range even if dispatching is disabled */
+      ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_end(cp);
+
+      return;
+   }
+
+   fs_cso = ilo_shader_get_kernel_cso(fs);
+   dw2 = fs_cso->payload[0];
+   dw4 = fs_cso->payload[1];
+   dw5 = fs_cso->payload[2];
+   dw6 = fs_cso->payload[3];
+
+   dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
+
+   if (true) {
+      dw4 |= GEN6_WM_STATISTICS_ENABLE;
+   }
+   else {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+       *
+       *     "This bit (Statistics Enable) must be disabled if either of these
+       *      bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
+       *      Resolve Enable or Depth Buffer Resolve Enable."
+       */
+      dw4 |= GEN6_WM_DEPTH_CLEAR;
+      dw4 |= GEN6_WM_DEPTH_RESOLVE;
+      dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
+   }
+
+   if (cc_may_kill) {
+      dw5 |= GEN6_WM_KILL_ENABLE |
+             GEN6_WM_DISPATCH_ENABLE;
+   }
+
+   if (dual_blend)
+      dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
+
+   dw5 |= rasterizer->wm.payload[0];
+
+   dw6 |= rasterizer->wm.payload[1];
+
+   if (num_samples > 1) {
+      dw6 |= rasterizer->wm.dw_msaa_rast |
+             rasterizer->wm.dw_msaa_disp;
+   }
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
+   ilo_cp_write(cp, dw2);
+   ilo_cp_write(cp, 0); /* scratch */
+   ilo_cp_write(cp, dw4);
+   ilo_cp_write(cp, dw5);
+   ilo_cp_write(cp, dw6);
+   ilo_cp_write(cp, 0); /* kernel 1 */
+   ilo_cp_write(cp, 0); /* kernel 2 */
+   ilo_cp_end(cp);
+}
+
+static inline unsigned
+gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
+                           const uint32_t *bufs, const int *sizes,
+                           int num_bufs, int max_read_length,
+                           uint32_t *dw, int num_dwords)
+{
+   unsigned enabled = 0x0;
+   int total_read_length, i;
+
+   assert(num_dwords == 4);
+
+   total_read_length = 0;
+   for (i = 0; i < 4; i++) {
+      if (i < num_bufs && sizes[i]) {
+         /* in 256-bit units minus one */
+         const int read_len = (sizes[i] + 31) / 32 - 1;
+
+         assert(bufs[i] % 32 == 0);
+         assert(read_len < 32);
+
+         enabled |= 1 << i;
+         dw[i] = bufs[i] | read_len;
+
+         total_read_length += read_len + 1;
+      }
+      else {
+         dw[i] = 0;
+      }
+   }
+
+   assert(total_read_length <= max_read_length);
+
+   return enabled;
+}
+
+static inline void
+gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
+                              const uint32_t *bufs, const int *sizes,
+                              int num_bufs,
+                              struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
+   const uint8_t cmd_len = 5;
+   uint32_t buf_dw[4], buf_enabled;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+   assert(num_bufs <= 4);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 138:
+    *
+    *     "The sum of all four read length fields (each incremented to
+    *      represent the actual read length) must be less than or equal to 32"
+    */
+   buf_enabled = gen6_fill_3dstate_constant(dev,
+         bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
+   ilo_cp_write(cp, buf_dw[0]);
+   ilo_cp_write(cp, buf_dw[1]);
+   ilo_cp_write(cp, buf_dw[2]);
+   ilo_cp_write(cp, buf_dw[3]);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
+                              const uint32_t *bufs, const int *sizes,
+                              int num_bufs,
+                              struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
+   const uint8_t cmd_len = 5;
+   uint32_t buf_dw[4], buf_enabled;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+   assert(num_bufs <= 4);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 161:
+    *
+    *     "The sum of all four read length fields (each incremented to
+    *      represent the actual read length) must be less than or equal to 64"
+    */
+   buf_enabled = gen6_fill_3dstate_constant(dev,
+         bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
+   ilo_cp_write(cp, buf_dw[0]);
+   ilo_cp_write(cp, buf_dw[1]);
+   ilo_cp_write(cp, buf_dw[2]);
+   ilo_cp_write(cp, buf_dw[3]);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
+                              const uint32_t *bufs, const int *sizes,
+                              int num_bufs,
+                              struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
+   const uint8_t cmd_len = 5;
+   uint32_t buf_dw[4], buf_enabled;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+   assert(num_bufs <= 4);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 287:
+    *
+    *     "The sum of all four read length fields (each incremented to
+    *      represent the actual read length) must be less than or equal to 64"
+    */
+   buf_enabled = gen6_fill_3dstate_constant(dev,
+         bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
+   ilo_cp_write(cp, buf_dw[0]);
+   ilo_cp_write(cp, buf_dw[1]);
+   ilo_cp_write(cp, buf_dw[2]);
+   ilo_cp_write(cp, buf_dw[3]);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
+                              unsigned sample_mask,
+                              struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
+   const uint8_t cmd_len = 2;
+   const unsigned valid_mask = 0xf;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   sample_mask &= valid_mask;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, sample_mask);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
+                                    unsigned x, unsigned y,
+                                    unsigned width, unsigned height,
+                                    struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
+   const uint8_t cmd_len = 4;
+   unsigned xmax = x + width - 1;
+   unsigned ymax = y + height - 1;
+   int rect_limit;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   if (dev->gen >= ILO_GEN(7)) {
+      rect_limit = 16383;
+   }
+   else {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 230:
+       *
+       *     "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
+       *      must be an even number"
+       */
+      assert(y % 2 == 0);
+
+      rect_limit = 8191;
+   }
+
+   if (x > rect_limit) x = rect_limit;
+   if (y > rect_limit) y = rect_limit;
+   if (xmax > rect_limit) xmax = rect_limit;
+   if (ymax > rect_limit) ymax = rect_limit;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, y << 16 | x);
+   ilo_cp_write(cp, ymax << 16 | xmax);
+
+   /*
+    * There is no need to set the origin.  It is intended to support front
+    * buffer rendering.
+    */
+   ilo_cp_write(cp, 0);
+
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
+                               const struct ilo_zs_surface *zs,
+                               struct ilo_cp *cp)
+{
+   const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+      ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
+   const uint8_t cmd_len = 7;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, zs->payload[0]);
+   ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
+         INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+   ilo_cp_write(cp, zs->payload[2]);
+   ilo_cp_write(cp, zs->payload[3]);
+   ilo_cp_write(cp, zs->payload[4]);
+   ilo_cp_write(cp, zs->payload[5]);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
+                                      int x_offset, int y_offset,
+                                      struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
+   const uint8_t cmd_len = 2;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+   assert(x_offset >= 0 && x_offset <= 31);
+   assert(y_offset >= 0 && y_offset <= 31);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, x_offset << 8 | y_offset);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
+                                       const struct pipe_poly_stipple *pattern,
+                                       struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
+   const uint8_t cmd_len = 33;
+   int i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+   assert(Elements(pattern->stipple) == 32);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   for (i = 0; i < 32; i++)
+      ilo_cp_write(cp, pattern->stipple[i]);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
+                               unsigned pattern, unsigned factor,
+                               struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
+   const uint8_t cmd_len = 3;
+   unsigned inverse;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+   assert((pattern & 0xffff) == pattern);
+   assert(factor >= 1 && factor <= 256);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, pattern);
+
+   if (dev->gen >= ILO_GEN(7)) {
+      /* in U1.16 */
+      inverse = (unsigned) (65536.0f / factor);
+      ilo_cp_write(cp, inverse << 15 | factor);
+   }
+   else {
+      /* in U1.13 */
+      inverse = (unsigned) (8192.0f / factor);
+      ilo_cp_write(cp, inverse << 16 | factor);
+   }
+
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
+                                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
+   const uint8_t cmd_len = 3;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, 0 << 16 | 0);
+   ilo_cp_write(cp, 0 << 16 | 0);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
+                               int index, unsigned svbi,
+                               unsigned max_svbi,
+                               bool load_vertex_count,
+                               struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
+   const uint8_t cmd_len = 4;
+   uint32_t dw1;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+   assert(index >= 0 && index < 4);
+
+   dw1 = index << SVB_INDEX_SHIFT;
+   if (load_vertex_count)
+      dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, dw1);
+   ilo_cp_write(cp, svbi);
+   ilo_cp_write(cp, max_svbi);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
+                              int num_samples,
+                              const uint32_t *packed_sample_pos,
+                              bool pixel_location_center,
+                              struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
+   const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
+   uint32_t dw1, dw2, dw3;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   dw1 = (pixel_location_center) ?
+      MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
+
+   switch (num_samples) {
+   case 0:
+   case 1:
+      dw1 |= MS_NUMSAMPLES_1;
+      dw2 = 0;
+      dw3 = 0;
+      break;
+   case 4:
+      dw1 |= MS_NUMSAMPLES_4;
+      dw2 = packed_sample_pos[0];
+      dw3 = 0;
+      break;
+   case 8:
+      assert(dev->gen >= ILO_GEN(7));
+      dw1 |= MS_NUMSAMPLES_8;
+      dw2 = packed_sample_pos[0];
+      dw3 = packed_sample_pos[1];
+      break;
+   default:
+      assert(!"unsupported sample count");
+      dw1 |= MS_NUMSAMPLES_1;
+      dw2 = 0;
+      dw3 = 0;
+      break;
+   }
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, dw1);
+   ilo_cp_write(cp, dw2);
+   if (dev->gen >= ILO_GEN(7))
+      ilo_cp_write(cp, dw3);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
+                                 const struct ilo_zs_surface *zs,
+                                 struct ilo_cp *cp)
+{
+   const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+      ILO_GPE_CMD(0x3, 0x0, 0x06) :
+      ILO_GPE_CMD(0x3, 0x1, 0x0e);
+   const uint8_t cmd_len = 3;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   /* see ilo_gpe_init_zs_surface() */
+   ilo_cp_write(cp, zs->payload[6]);
+   ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
+         INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
+                                    const struct ilo_zs_surface *zs,
+                                    struct ilo_cp *cp)
+{
+   const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
+      ILO_GPE_CMD(0x3, 0x0, 0x07) :
+      ILO_GPE_CMD(0x3, 0x1, 0x0f);
+   const uint8_t cmd_len = 3;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   /* see ilo_gpe_init_zs_surface() */
+   ilo_cp_write(cp, zs->payload[8]);
+   ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
+         INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
+                               uint32_t clear_val,
+                               struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
+   const uint8_t cmd_len = 2;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2) |
+                    GEN5_DEPTH_CLEAR_VALID);
+   ilo_cp_write(cp, clear_val);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
+                       uint32_t dw1,
+                       struct intel_bo *bo, uint32_t bo_offset,
+                       bool write_qword,
+                       struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
+   const uint8_t cmd_len = (write_qword) ? 5 : 4;
+   const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
+   const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   if (dw1 & PIPE_CONTROL_CS_STALL) {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 73:
+       *
+       *     "1 of the following must also be set (when CS stall is set):
+       *
+       *       * Depth Cache Flush Enable ([0] of DW1)
+       *       * Stall at Pixel Scoreboard ([1] of DW1)
+       *       * Depth Stall ([13] of DW1)
+       *       * Post-Sync Operation ([13] of DW1)
+       *       * Render Target Cache Flush Enable ([12] of DW1)
+       *       * Notify Enable ([8] of DW1)"
+       *
+       * From the Ivy Bridge PRM, volume 2 part 1, page 61:
+       *
+       *     "One of the following must also be set (when CS stall is set):
+       *
+       *       * Render Target Cache Flush Enable ([12] of DW1)
+       *       * Depth Cache Flush Enable ([0] of DW1)
+       *       * Stall at Pixel Scoreboard ([1] of DW1)
+       *       * Depth Stall ([13] of DW1)
+       *       * Post-Sync Operation ([13] of DW1)"
+       */
+      uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
+                          PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                          PIPE_CONTROL_STALL_AT_SCOREBOARD |
+                          PIPE_CONTROL_DEPTH_STALL;
+
+      /* post-sync op */
+      bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
+                  PIPE_CONTROL_WRITE_DEPTH_COUNT |
+                  PIPE_CONTROL_WRITE_TIMESTAMP;
+
+      if (dev->gen == ILO_GEN(6))
+         bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
+
+      assert(dw1 & bit_test);
+   }
+
+   if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 73:
+       *
+       *     "Following bits must be clear (when Depth Stall is set):
+       *
+       *       * Render Target Cache Flush Enable ([12] of DW1)
+       *       * Depth Cache Flush Enable ([0] of DW1)"
+       */
+      assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
+                      PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
+   }
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, dw1);
+   ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
+   ilo_cp_write(cp, 0);
+   if (write_qword)
+      ilo_cp_write(cp, 0);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
+                      const struct pipe_draw_info *info,
+                      const struct ilo_ib_state *ib,
+                      bool rectlist,
+                      struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
+   const uint8_t cmd_len = 6;
+   const int prim = (rectlist) ?
+      _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
+   const int vb_access = (info->indexed) ?
+      GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
+      GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+   const uint32_t vb_start = info->start +
+      ((info->indexed) ? ib->draw_start_offset : 0);
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2) |
+                    prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+                    vb_access);
+   ilo_cp_write(cp, info->count);
+   ilo_cp_write(cp, vb_start);
+   ilo_cp_write(cp, info->instance_count);
+   ilo_cp_write(cp, info->start_instance);
+   ilo_cp_write(cp, info->index_bias);
+   ilo_cp_end(cp);
+}
+
+static inline uint32_t
+gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
+                                    const struct ilo_shader_state **cs,
+                                    uint32_t *sampler_state,
+                                    int *num_samplers,
+                                    uint32_t *binding_table_state,
+                                    int *num_surfaces,
+                                    int num_ids,
+                                    struct ilo_cp *cp)
+{
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 2, page 34:
+    *
+    *     "(Interface Descriptor Total Length) This field must have the same
+    *      alignment as the Interface Descriptor Data Start Address.
+    *
+    *      It must be DQWord (32-byte) aligned..."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 2, page 35:
+    *
+    *     "(Interface Descriptor Data Start Address) Specifies the 32-byte
+    *      aligned address of the Interface Descriptor data."
+    */
+   const int state_align = 32 / 4;
+   const int state_len = (32 / 4) * num_ids;
+   uint32_t state_offset, *dw;
+   int i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
+         state_len, state_align, &state_offset);
+
+   for (i = 0; i < num_ids; i++) {
+      dw[0] = ilo_shader_get_kernel_offset(cs[i]);
+      dw[1] = 1 << 18; /* SPF */
+      dw[2] = sampler_state[i] |
+              (num_samplers[i] + 3) / 4 << 2;
+      dw[3] = binding_table_state[i] |
+              num_surfaces[i];
+      dw[4] = 0 << 16 |  /* CURBE Read Length */
+              0;         /* CURBE Read Offset */
+      dw[5] = 0; /* Barrier ID */
+      dw[6] = 0;
+      dw[7] = 0;
+
+      dw += 8;
+   }
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
+                      const struct ilo_viewport_cso *viewports,
+                      unsigned num_viewports,
+                      struct ilo_cp *cp)
+{
+   const int state_align = 32 / 4;
+   const int state_len = 8 * num_viewports;
+   uint32_t state_offset, *dw;
+   unsigned i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 262:
+    *
+    *     "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
+    *      stored as an array of up to 16 elements..."
+    */
+   assert(num_viewports && num_viewports <= 16);
+
+   dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
+         state_len, state_align, &state_offset);
+
+   for (i = 0; i < num_viewports; i++) {
+      const struct ilo_viewport_cso *vp = &viewports[i];
+
+      dw[0] = fui(vp->m00);
+      dw[1] = fui(vp->m11);
+      dw[2] = fui(vp->m22);
+      dw[3] = fui(vp->m30);
+      dw[4] = fui(vp->m31);
+      dw[5] = fui(vp->m32);
+      dw[6] = 0;
+      dw[7] = 0;
+
+      dw += 8;
+   }
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
+                        const struct ilo_viewport_cso *viewports,
+                        unsigned num_viewports,
+                        struct ilo_cp *cp)
+{
+   const int state_align = 32 / 4;
+   const int state_len = 4 * num_viewports;
+   uint32_t state_offset, *dw;
+   unsigned i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 193:
+    *
+    *     "The viewport-related state is stored as an array of up to 16
+    *      elements..."
+    */
+   assert(num_viewports && num_viewports <= 16);
+
+   dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
+         state_len, state_align, &state_offset);
+
+   for (i = 0; i < num_viewports; i++) {
+      const struct ilo_viewport_cso *vp = &viewports[i];
+
+      dw[0] = fui(vp->min_gbx);
+      dw[1] = fui(vp->max_gbx);
+      dw[2] = fui(vp->min_gby);
+      dw[3] = fui(vp->max_gby);
+
+      dw += 4;
+   }
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
+                      const struct ilo_viewport_cso *viewports,
+                      unsigned num_viewports,
+                      struct ilo_cp *cp)
+{
+   const int state_align = 32 / 4;
+   const int state_len = 2 * num_viewports;
+   uint32_t state_offset, *dw;
+   unsigned i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 385:
+    *
+    *     "The viewport state is stored as an array of up to 16 elements..."
+    */
+   assert(num_viewports && num_viewports <= 16);
+
+   dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
+         state_len, state_align, &state_offset);
+
+   for (i = 0; i < num_viewports; i++) {
+      const struct ilo_viewport_cso *vp = &viewports[i];
+
+      dw[0] = fui(vp->min_z);
+      dw[1] = fui(vp->max_z);
+
+      dw += 2;
+   }
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
+                           const struct pipe_stencil_ref *stencil_ref,
+                           float alpha_ref,
+                           const struct pipe_blend_color *blend_color,
+                           struct ilo_cp *cp)
+{
+   const int state_align = 64 / 4;
+   const int state_len = 6;
+   uint32_t state_offset, *dw;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
+         state_len, state_align, &state_offset);
+
+   dw[0] = stencil_ref->ref_value[0] << 24 |
+           stencil_ref->ref_value[1] << 16 |
+           BRW_ALPHATEST_FORMAT_UNORM8;
+   dw[1] = float_to_ubyte(alpha_ref);
+   dw[2] = fui(blend_color->color[0]);
+   dw[3] = fui(blend_color->color[1]);
+   dw[4] = fui(blend_color->color[2]);
+   dw[5] = fui(blend_color->color[3]);
+
+   return state_offset;
+}
+
+/**
+ * Translate a pipe DSA test function to the matching hardware compare
+ * function.
+ */
+static int
+gen6_translate_dsa_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:      return BRW_COMPAREFUNCTION_NEVER;
+   case PIPE_FUNC_LESS:       return BRW_COMPAREFUNCTION_LESS;
+   case PIPE_FUNC_EQUAL:      return BRW_COMPAREFUNCTION_EQUAL;
+   case PIPE_FUNC_LEQUAL:     return BRW_COMPAREFUNCTION_LEQUAL;
+   case PIPE_FUNC_GREATER:    return BRW_COMPAREFUNCTION_GREATER;
+   case PIPE_FUNC_NOTEQUAL:   return BRW_COMPAREFUNCTION_NOTEQUAL;
+   case PIPE_FUNC_GEQUAL:     return BRW_COMPAREFUNCTION_GEQUAL;
+   case PIPE_FUNC_ALWAYS:     return BRW_COMPAREFUNCTION_ALWAYS;
+   default:
+      assert(!"unknown depth/stencil/alpha test function");
+      return BRW_COMPAREFUNCTION_NEVER;
+   }
+}
+
+static inline uint32_t
+gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
+                      const struct ilo_blend_state *blend,
+                      const struct ilo_fb_state *fb,
+                      const struct pipe_alpha_state *alpha,
+                      struct ilo_cp *cp)
+{
+   const int state_align = 64 / 4;
+   int state_len;
+   uint32_t state_offset, *dw;
+   unsigned num_targets, i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 376:
+    *
+    *     "The blend state is stored as an array of up to 8 elements..."
+    */
+   num_targets = fb->state.nr_cbufs;
+   assert(num_targets <= 8);
+
+   if (!num_targets) {
+      if (!alpha->enabled)
+         return 0;
+      /* to be able to reference alpha func */
+      num_targets = 1;
+   }
+
+   state_len = 2 * num_targets;
+
+   dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
+         state_len, state_align, &state_offset);
+
+   for (i = 0; i < num_targets; i++) {
+      const unsigned idx = (blend->independent_blend_enable) ? i : 0;
+      const struct ilo_blend_cso *cso = &blend->cso[idx];
+      const int num_samples = fb->num_samples;
+      const struct util_format_description *format_desc =
+         (idx < fb->state.nr_cbufs) ?
+         util_format_description(fb->state.cbufs[idx]->format) : NULL;
+      bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
+
+      rt_is_unorm = true;
+      rt_is_pure_integer = false;
+      rt_dst_alpha_forced_one = false;
+
+      if (format_desc) {
+         int ch;
+
+         switch (format_desc->format) {
+         case PIPE_FORMAT_B8G8R8X8_UNORM:
+            /* force alpha to one when the HW format has alpha */
+            assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
+                  == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
+            rt_dst_alpha_forced_one = true;
+            break;
+         default:
+            break;
+         }
+
+         for (ch = 0; ch < 4; ch++) {
+            if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
+               continue;
+
+            if (format_desc->channel[ch].pure_integer) {
+               rt_is_unorm = false;
+               rt_is_pure_integer = true;
+               break;
+            }
+
+            if (!format_desc->channel[ch].normalized ||
+                format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
+               rt_is_unorm = false;
+         }
+      }
+
+      dw[0] = cso->payload[0];
+      dw[1] = cso->payload[1];
+
+      if (!rt_is_pure_integer) {
+         if (rt_dst_alpha_forced_one)
+            dw[0] |= cso->dw_blend_dst_alpha_forced_one;
+         else
+            dw[0] |= cso->dw_blend;
+      }
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+       *
+       *     "Logic Ops are only supported on *_UNORM surfaces (excluding
+       *      _SRGB variants), otherwise Logic Ops must be DISABLED."
+       *
+       * Since logicop is ignored for non-UNORM color buffers, no special care
+       * is needed.
+       */
+      if (rt_is_unorm)
+         dw[1] |= cso->dw_logicop;
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 356:
+       *
+       *     "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
+       *      Dither both must be disabled."
+       *
+       * There is no such limitation on GEN7, or for AlphaToOne.  But GL
+       * requires that anyway.
+       */
+      if (num_samples > 1)
+         dw[1] |= cso->dw_alpha_mod;
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+       *
+       *     "Alpha Test can only be enabled if Pixel Shader outputs a float
+       *      alpha value."
+       */
+      if (alpha->enabled && !rt_is_pure_integer) {
+         dw[1] |= 1 << 16 |
+                  gen6_translate_dsa_func(alpha->func) << 13;
+      }
+
+      dw += 2;
+   }
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
+                              const struct ilo_dsa_state *dsa,
+                              struct ilo_cp *cp)
+{
+   const int state_align = 64 / 4;
+   const int state_len = 3;
+   uint32_t state_offset, *dw;
+
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
+         state_len, state_align, &state_offset);
+
+   dw[0] = dsa->payload[0];
+   dw[1] = dsa->payload[1];
+   dw[2] = dsa->payload[2];
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
+                       const struct ilo_scissor_state *scissor,
+                       unsigned num_viewports,
+                       struct ilo_cp *cp)
+{
+   const int state_align = 32 / 4;
+   const int state_len = 2 * num_viewports;
+   uint32_t state_offset, *dw;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 263:
+    *
+    *     "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
+    *      stored as an array of up to 16 elements..."
+    */
+   assert(num_viewports && num_viewports <= 16);
+
+   dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
+         state_len, state_align, &state_offset);
+
+   memcpy(dw, scissor->payload, state_len * 4);
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
+                              uint32_t *surface_states,
+                              int num_surface_states,
+                              struct ilo_cp *cp)
+{
+   const int state_align = 32 / 4;
+   const int state_len = num_surface_states;
+   uint32_t state_offset, *dw;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 69:
+    *
+    *     "It is stored as an array of up to 256 elements..."
+    */
+   assert(num_surface_states <= 256);
+
+   if (!num_surface_states)
+      return 0;
+
+   dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
+         state_len, state_align, &state_offset);
+   memcpy(dw, surface_states,
+         num_surface_states * sizeof(surface_states[0]));
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
+                        const struct ilo_view_surface *surf,
+                        bool for_render,
+                        struct ilo_cp *cp)
+{
+   const int state_align = 32 / 4;
+   const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
+   uint32_t state_offset;
+   uint32_t read_domains, write_domain;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   if (for_render) {
+      read_domains = INTEL_DOMAIN_RENDER;
+      write_domain = INTEL_DOMAIN_RENDER;
+   }
+   else {
+      read_domains = INTEL_DOMAIN_SAMPLER;
+      write_domain = 0;
+   }
+
+   ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
+
+   STATIC_ASSERT(Elements(surf->payload) >= 8);
+
+   ilo_cp_write(cp, surf->payload[0]);
+   ilo_cp_write_bo(cp, surf->payload[1],
+         surf->bo, read_domains, write_domain);
+   ilo_cp_write(cp, surf->payload[2]);
+   ilo_cp_write(cp, surf->payload[3]);
+   ilo_cp_write(cp, surf->payload[4]);
+   ilo_cp_write(cp, surf->payload[5]);
+
+   if (dev->gen >= ILO_GEN(7)) {
+      ilo_cp_write(cp, surf->payload[6]);
+      ilo_cp_write(cp, surf->payload[7]);
+   }
+
+   ilo_cp_end(cp);
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
+                           const struct pipe_stream_output_target *so,
+                           const struct pipe_stream_output_info *so_info,
+                           int so_index,
+                           struct ilo_cp *cp)
+{
+   struct ilo_buffer *buf = ilo_buffer(so->buffer);
+   unsigned bo_offset, struct_size;
+   enum pipe_format elem_format;
+   struct ilo_view_surface surf;
+
+   ILO_GPE_VALID_GEN(dev, 6, 6);
+
+   bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
+   struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
+
+   switch (so_info->output[so_index].num_components) {
+   case 1:
+      elem_format = PIPE_FORMAT_R32_FLOAT;
+      break;
+   case 2:
+      elem_format = PIPE_FORMAT_R32G32_FLOAT;
+      break;
+   case 3:
+      elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
+      break;
+   case 4:
+      elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+      break;
+   default:
+      assert(!"unexpected SO components length");
+      elem_format = PIPE_FORMAT_R32_FLOAT;
+      break;
+   }
+
+   ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
+         struct_size, elem_format, false, true, &surf);
+
+   return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
+}
+
+static inline uint32_t
+gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
+                        const struct ilo_sampler_cso * const *samplers,
+                        const struct pipe_sampler_view * const *views,
+                        const uint32_t *sampler_border_colors,
+                        int num_samplers,
+                        struct ilo_cp *cp)
+{
+   const int state_align = 32 / 4;
+   const int state_len = 4 * num_samplers;
+   uint32_t state_offset, *dw;
+   int i;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 101:
+    *
+    *     "The sampler state is stored as an array of up to 16 elements..."
+    */
+   assert(num_samplers <= 16);
+
+   if (!num_samplers)
+      return 0;
+
+   dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
+         state_len, state_align, &state_offset);
+
+   for (i = 0; i < num_samplers; i++) {
+      const struct ilo_sampler_cso *sampler = samplers[i];
+      const struct pipe_sampler_view *view = views[i];
+      const uint32_t border_color = sampler_border_colors[i];
+      uint32_t dw_filter, dw_wrap;
+
+      /* there may be holes */
+      if (!sampler || !view) {
+         /* disabled sampler */
+         dw[0] = 1 << 31;
+         dw[1] = 0;
+         dw[2] = 0;
+         dw[3] = 0;
+         dw += 4;
+
+         continue;
+      }
+
+      /* determine filter and wrap modes */
+      switch (view->texture->target) {
+      case PIPE_TEXTURE_1D:
+         dw_filter = (sampler->anisotropic) ?
+            sampler->dw_filter_aniso : sampler->dw_filter;
+         dw_wrap = sampler->dw_wrap_1d;
+         break;
+      case PIPE_TEXTURE_3D:
+         /*
+          * From the Sandy Bridge PRM, volume 4 part 1, page 103:
+          *
+          *     "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
+          *      surfaces of type SURFTYPE_3D."
+          */
+         dw_filter = sampler->dw_filter;
+         dw_wrap = sampler->dw_wrap;
+         break;
+      case PIPE_TEXTURE_CUBE:
+         dw_filter = (sampler->anisotropic) ?
+            sampler->dw_filter_aniso : sampler->dw_filter;
+         dw_wrap = sampler->dw_wrap_cube;
+         break;
+      default:
+         dw_filter = (sampler->anisotropic) ?
+            sampler->dw_filter_aniso : sampler->dw_filter;
+         dw_wrap = sampler->dw_wrap;
+         break;
+      }
+
+      dw[0] = sampler->payload[0];
+      dw[1] = sampler->payload[1];
+      assert(!(border_color & 0x1f));
+      dw[2] = border_color;
+      dw[3] = sampler->payload[2];
+
+      dw[0] |= dw_filter;
+
+      if (dev->gen >= ILO_GEN(7)) {
+         dw[3] |= dw_wrap;
+      }
+      else {
+         /*
+          * From the Sandy Bridge PRM, volume 4 part 1, page 21:
+          *
+          *     "[DevSNB] Errata: Incorrect behavior is observed in cases
+          *      where the min and mag mode filters are different and
+          *      SurfMinLOD is nonzero. The determination of MagMode uses the
+          *      following equation instead of the one in the above
+          *      pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
+          *
+          * As a way to work around that, we set Base to
+          * view->u.tex.first_level.
+          */
+         dw[0] |= view->u.tex.first_level << 22;
+
+         dw[1] |= dw_wrap;
+      }
+
+      dw += 4;
+   }
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
+                                     const struct ilo_sampler_cso *sampler,
+                                     struct ilo_cp *cp)
+{
+   const int state_align = 32 / 4;
+   const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
+   uint32_t state_offset, *dw;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
+         state_len, state_align, &state_offset);
+
+   /* see ilo_gpe_init_sampler_cso() */
+   memcpy(dw, &sampler->payload[3], state_len * 4);
+
+   return state_offset;
+}
+
+static inline uint32_t
+gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
+                               int size, void **pcb,
+                               struct ilo_cp *cp)
+{
+   /*
+    * For all VS, GS, FS, and CS push constant buffers, they must be aligned
+    * to 32 bytes, and their sizes are specified in 256-bit units.
+    */
+   const int state_align = 32 / 4;
+   const int state_len = align(size, 32) / 4;
+   uint32_t state_offset;
+   char *buf;
+
+   ILO_GPE_VALID_GEN(dev, 6, 7);
+
+   buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
+         state_len, state_align, &state_offset);
+
+   /* zero out the unused range */
+   if (size < state_len * 4)
+      memset(&buf[size], 0, state_len * 4 - size);
+
+   if (pcb)
+      *pcb = buf;
+
+   return state_offset;
+}
 
 #endif /* ILO_GPE_GEN6_H */
index 2a590be2ddcf01fa25ab008bf7aa944ebdbf5832..0af7eea0cb2008e20ea05d97d42f954571a02a11 100644 (file)
 #include "ilo_shader.h"
 #include "ilo_gpe_gen7.h"
 
-static void
-gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
-                       struct ilo_cp *cp)
-{
-   assert(!"GPGPU_WALKER unsupported");
-}
-
-static void
-gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
-                               uint32_t clear_val,
-                               struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
-   const uint8_t cmd_len = 3;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, clear_val);
-   ilo_cp_write(cp, 1);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
-                          int subop, uint32_t pointer,
-                          struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
-   const uint8_t cmd_len = 2;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, pointer);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                    uint32_t color_calc_state,
-                                    struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
-}
-
 void
 ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
                          const struct ilo_shader_state *gs,
@@ -125,74 +77,6 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
    cso->payload[2] = dw5;
 }
 
-static void
-gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *gs,
-                     int num_samplers,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
-   const uint8_t cmd_len = 7;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   if (!gs) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-      return;
-   }
-
-   cso = ilo_shader_get_kernel_cso(gs);
-   dw2 = cso->payload[0];
-   dw4 = cso->payload[1];
-   dw5 = cso->payload[2];
-
-   dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
-                     const struct ilo_rasterizer_state *rasterizer,
-                     const struct pipe_surface *zs_surf,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
-   const uint8_t cmd_len = 7;
-   const int num_samples = 1;
-   uint32_t payload[6];
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
-         rasterizer, num_samples,
-         (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
-         payload, Elements(payload));
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write_multi(cp, payload, 6);
-   ilo_cp_end(cp);
-}
-
 void
 ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
                                 const struct pipe_rasterizer_state *state,
@@ -335,934 +219,6 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
    cso->payload[3] = wm_dw1;
 }
 
-static void
-gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *fs,
-                     const struct ilo_rasterizer_state *rasterizer,
-                     bool cc_may_kill,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
-   const uint8_t cmd_len = 3;
-   const int num_samples = 1;
-   uint32_t dw1, dw2;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /* see ilo_gpe_init_rasterizer_wm() */
-   dw1 = rasterizer->wm.payload[0];
-   dw2 = rasterizer->wm.payload[1];
-
-   dw1 |= GEN7_WM_STATISTICS_ENABLE;
-
-   if (false) {
-      dw1 |= GEN7_WM_DEPTH_CLEAR;
-      dw1 |= GEN7_WM_DEPTH_RESOLVE;
-      dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
-   }
-
-   if (fs) {
-      const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
-
-      dw1 |= fs_cso->payload[3];
-   }
-
-   if (cc_may_kill) {
-      dw1 |= GEN7_WM_DISPATCH_ENABLE |
-             GEN7_WM_KILL_ENABLE;
-   }
-
-   if (num_samples > 1) {
-      dw1 |= rasterizer->wm.dw_msaa_rast;
-      dw2 |= rasterizer->wm.dw_msaa_disp;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
-                           int subop,
-                           const uint32_t *bufs, const int *sizes,
-                           int num_bufs,
-                           struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
-   const uint8_t cmd_len = 7;
-   uint32_t dw[6];
-   int total_read_length, i;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /* VS, HS, DS, GS, and PS variants */
-   assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
-
-   assert(num_bufs <= 4);
-
-   dw[0] = 0;
-   dw[1] = 0;
-
-   total_read_length = 0;
-   for (i = 0; i < 4; i++) {
-      int read_len;
-
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 112:
-       *
-       *     "Constant buffers must be enabled in order from Constant Buffer 0
-       *      to Constant Buffer 3 within this command.  For example, it is
-       *      not allowed to enable Constant Buffer 1 by programming a
-       *      non-zero value in the VS Constant Buffer 1 Read Length without a
-       *      non-zero value in VS Constant Buffer 0 Read Length."
-       */
-      if (i >= num_bufs || !sizes[i]) {
-         for (; i < 4; i++) {
-            assert(i >= num_bufs || !sizes[i]);
-            dw[2 + i] = 0;
-         }
-         break;
-      }
-
-      /* read lengths are in 256-bit units */
-      read_len = (sizes[i] + 31) / 32;
-      /* the lower 5 bits are used for memory object control state */
-      assert(bufs[i] % 32 == 0);
-
-      dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
-      dw[2 + i] = bufs[i];
-
-      total_read_length += read_len;
-   }
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 113:
-    *
-    *     "The sum of all four read length fields must be less than or equal
-    *      to the size of 64"
-    */
-   assert(total_read_length <= 64);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write_multi(cp, dw, 6);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
-                              unsigned sample_mask,
-                              int num_samples,
-                              struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
-   const uint8_t cmd_len = 2;
-   const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 294:
-    *
-    *     "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
-    *      (Sample Mask) must be zero.
-    *
-    *      If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
-    *      must be zero."
-    */
-   sample_mask &= valid_mask;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, sample_mask);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
-}
-
-static void
-gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *hs,
-                     int num_samplers,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
-   const uint8_t cmd_len = 7;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   assert(!hs);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
-   const uint8_t cmd_len = 4;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *ds,
-                     int num_samplers,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
-   const uint8_t cmd_len = 6;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   assert(!ds);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
-
-}
-
-static void
-gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
-                            unsigned buffer_mask,
-                            int vertex_attrib_count,
-                            bool rasterizer_discard,
-                            struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
-   const uint8_t cmd_len = 3;
-   const bool enable = (buffer_mask != 0);
-   uint32_t dw1, dw2;
-   int read_len;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   if (!enable) {
-      dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
-      if (rasterizer_discard)
-         dw1 |= SO_RENDERING_DISABLE;
-
-      dw2 = 0;
-
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, dw1);
-      ilo_cp_write(cp, dw2);
-      ilo_cp_end(cp);
-      return;
-   }
-
-   read_len = (vertex_attrib_count + 1) / 2;
-   if (!read_len)
-      read_len = 1;
-
-   dw1 = SO_FUNCTION_ENABLE |
-         0 << SO_RENDER_STREAM_SELECT_SHIFT |
-         SO_STATISTICS_ENABLE |
-         buffer_mask << 8;
-
-   if (rasterizer_discard)
-      dw1 |= SO_RENDERING_DISABLE;
-
-   /* API_OPENGL */
-   if (true)
-      dw1 |= SO_REORDER_TRAILING;
-
-   dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
-         0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
-         0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
-         0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
-         0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
-         0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
-         0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
-         (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
-                      const struct ilo_rasterizer_state *rasterizer,
-                      const struct ilo_shader_state *fs,
-                      const struct ilo_shader_state *last_sh,
-                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
-   const uint8_t cmd_len = 14;
-   uint32_t dw[13];
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
-         fs, last_sh, dw, Elements(dw));
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write_multi(cp, dw, 13);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *fs,
-                     int num_samplers, bool dual_blend,
-                     struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
-   const uint8_t cmd_len = 8;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   if (!fs) {
-      /* see brwCreateContext() */
-      const int max_threads = (dev->gt == 2) ? 172 : 48;
-
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      /* GPU hangs if none of the dispatch enable bits is set */
-      ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
-                       GEN7_PS_8_DISPATCH_ENABLE);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-
-      return;
-   }
-
-   cso = ilo_shader_get_kernel_cso(fs);
-   dw2 = cso->payload[0];
-   dw4 = cso->payload[1];
-   dw5 = cso->payload[2];
-
-   dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT;
-
-   if (dual_blend)
-      dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_write(cp, 0); /* kernel 1 */
-   ilo_cp_write(cp, 0); /* kernel 2 */
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
-                                                  uint32_t sf_clip_viewport,
-                                                  struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
-}
-
-static void
-gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
-                                             uint32_t cc_viewport,
-                                             struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                       uint32_t blend_state,
-                                       struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                               uint32_t depth_stencil_state,
-                                               struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
-                                            uint32_t binding_table,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
-                                            uint32_t sampler_state,
-                                            struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
-}
-
-static void
-gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
-                      int subop, int offset, int size,
-                      int entry_size,
-                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
-   const uint8_t cmd_len = 2;
-   const int row_size = 64; /* 512 bits */
-   int alloc_size, num_entries, min_entries, max_entries;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /* VS, HS, DS, and GS variants */
-   assert(subop >= 0x30 && subop <= 0x33);
-
-   /* in multiples of 8KB */
-   assert(offset % 8192 == 0);
-   offset /= 8192;
-
-   /* in multiple of 512-bit rows */
-   alloc_size = (entry_size + row_size - 1) / row_size;
-   if (!alloc_size)
-      alloc_size = 1;
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 34:
-    *
-    *     "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
-    *      cause performance to decrease due to banking in the URB. Element
-    *      sizes of 16 to 20 should be programmed with six 512-bit URB rows."
-    */
-   if (subop == 0x30 && alloc_size == 5)
-      alloc_size = 6;
-
-   /* in multiples of 8 */
-   num_entries = (size / row_size / alloc_size) & ~7;
-
-   switch (subop) {
-   case 0x30: /* 3DSTATE_URB_VS */
-      min_entries = 32;
-      max_entries = (dev->gt == 2) ? 704 : 512;
-
-      assert(num_entries >= min_entries);
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   case 0x31: /* 3DSTATE_URB_HS */
-      max_entries = (dev->gt == 2) ? 64 : 32;
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   case 0x32: /* 3DSTATE_URB_DS */
-      if (num_entries)
-         assert(num_entries >= 138);
-      break;
-   case 0x33: /* 3DSTATE_URB_GS */
-      max_entries = (dev->gt == 2) ? 320 : 192;
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   default:
-      break;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
-                    (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
-                    num_entries);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
-                         int offset, int size, int entry_size,
-                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
-                         int offset, int size, int entry_size,
-                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
-                         int offset, int size, int entry_size,
-                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
-                         int offset, int size, int entry_size,
-                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
-}
-
-static void
-gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
-                                      int subop, int offset, int size,
-                                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
-   const uint8_t cmd_len = 2;
-   int end;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /* VS, HS, DS, GS, and PS variants */
-   assert(subop >= 0x12 && subop <= 0x16);
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 68:
-    *
-    *     "(A table that says the maximum size of each constant buffer is
-    *      16KB")
-    *
-    * From the Ivy Bridge PRM, volume 2 part 1, page 115:
-    *
-    *     "The sum of the Constant Buffer Offset and the Constant Buffer Size
-    *      may not exceed the maximum value of the Constant Buffer Size."
-    *
-    * Thus, the valid range of buffer end is [0KB, 16KB].
-    */
-   end = (offset + size) / 1024;
-   if (end > 16) {
-      assert(!"invalid constant buffer end");
-      end = 16;
-   }
-
-   /* the valid range of buffer offset is [0KB, 15KB] */
-   offset = (offset + 1023) / 1024;
-   if (offset > 15) {
-      assert(!"invalid constant buffer offset");
-      offset = 15;
-   }
-
-   if (offset > end) {
-      assert(!size);
-      offset = end;
-   }
-
-   /* the valid range of buffer size is [0KB, 15KB] */
-   size = end - offset;
-   if (size > 15) {
-      assert(!"invalid constant buffer size");
-      size = 15;
-   }
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
-                    size);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
-                                         int offset, int size,
-                                         struct ilo_cp *cp)
-{
-   gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
-}
-
-static void
-gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
-                               const struct pipe_stream_output_info *so_info,
-                               struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
-   uint16_t cmd_len;
-   int buffer_selects, num_entries, i;
-   uint16_t so_decls[128];
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   buffer_selects = 0;
-   num_entries = 0;
-
-   if (so_info) {
-      int buffer_offsets[PIPE_MAX_SO_BUFFERS];
-
-      memset(buffer_offsets, 0, sizeof(buffer_offsets));
-
-      for (i = 0; i < so_info->num_outputs; i++) {
-         unsigned decl, buf, reg, mask;
-
-         buf = so_info->output[i].output_buffer;
-
-         /* pad with holes */
-         assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
-         while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
-            int num_dwords;
-
-            num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
-            if (num_dwords > 4)
-               num_dwords = 4;
-
-            decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
-                   SO_DECL_HOLE_FLAG |
-                   ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
-
-            so_decls[num_entries++] = decl;
-            buffer_offsets[buf] += num_dwords;
-         }
-
-         reg = so_info->output[i].register_index;
-         mask = ((1 << so_info->output[i].num_components) - 1) <<
-            so_info->output[i].start_component;
-
-         decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
-                reg << SO_DECL_REGISTER_INDEX_SHIFT |
-                mask << SO_DECL_COMPONENT_MASK_SHIFT;
-
-         so_decls[num_entries++] = decl;
-         buffer_selects |= 1 << buf;
-         buffer_offsets[buf] += so_info->output[i].num_components;
-      }
-   }
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 201:
-    *
-    *     "Errata: All 128 decls for all four streams must be included
-    *      whenever this command is issued. The "Num Entries [n]" fields still
-    *      contain the actual numbers of valid decls."
-    *
-    * Also note that "DWord Length" has 9 bits for this command, and the type
-    * of cmd_len is thus uint16_t.
-    */
-   cmd_len = 2 * 128 + 3;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
-                    0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
-                    0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
-                    buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
-   ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
-                    0 << SO_NUM_ENTRIES_2_SHIFT |
-                    0 << SO_NUM_ENTRIES_1_SHIFT |
-                    num_entries << SO_NUM_ENTRIES_0_SHIFT);
-
-   for (i = 0; i < num_entries; i++) {
-      ilo_cp_write(cp, so_decls[i]);
-      ilo_cp_write(cp, 0);
-   }
-   for (; i < 128; i++) {
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-   }
-
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
-                            int index, int base, int stride,
-                            const struct pipe_stream_output_target *so_target,
-                            struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
-   const uint8_t cmd_len = 4;
-   struct ilo_buffer *buf;
-   int end;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   if (!so_target || !so_target->buffer) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, cmd | (cmd_len - 2));
-      ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
-      return;
-   }
-
-   buf = ilo_buffer(so_target->buffer);
-
-   /* DWord-aligned */
-   assert(stride % 4 == 0 && base % 4 == 0);
-   assert(so_target->buffer_offset % 4 == 0);
-
-   stride &= ~3;
-   base = (base + so_target->buffer_offset) & ~3;
-   end = (base + so_target->buffer_size) & ~3;
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
-                    stride);
-   ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
-   ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
-   ilo_cp_end(cp);
-}
-
-static void
-gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
-                      const struct pipe_draw_info *info,
-                      const struct ilo_ib_state *ib,
-                      bool rectlist,
-                      struct ilo_cp *cp)
-{
-   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
-   const uint8_t cmd_len = 7;
-   const int prim = (rectlist) ?
-      _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
-   const int vb_access = (info->indexed) ?
-      GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
-      GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
-   const uint32_t vb_start = info->start +
-      ((info->indexed) ? ib->draw_start_offset : 0);
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, cmd | (cmd_len - 2));
-   ilo_cp_write(cp, vb_access | prim);
-   ilo_cp_write(cp, info->count);
-   ilo_cp_write(cp, vb_start);
-   ilo_cp_write(cp, info->instance_count);
-   ilo_cp_write(cp, info->start_instance);
-   ilo_cp_write(cp, info->index_bias);
-   ilo_cp_end(cp);
-}
-
-static uint32_t
-gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
-                           const struct ilo_viewport_cso *viewports,
-                           unsigned num_viewports,
-                           struct ilo_cp *cp)
-{
-   const int state_align = 64 / 4;
-   const int state_len = 16 * num_viewports;
-   uint32_t state_offset, *dw;
-   unsigned i;
-
-   ILO_GPE_VALID_GEN(dev, 7, 7);
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 270:
-    *
-    *     "The viewport-specific state used by both the SF and CL units
-    *      (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
-    *      of which contains the DWords described below. The start of each
-    *      element is spaced 16 DWords apart. The location of first element of
-    *      the array, as specified by both Pointer to SF_VIEWPORT and Pointer
-    *      to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
-   dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
-         state_len, state_align, &state_offset);
-
-   for (i = 0; i < num_viewports; i++) {
-      const struct ilo_viewport_cso *vp = &viewports[i];
-
-      dw[0] = fui(vp->m00);
-      dw[1] = fui(vp->m11);
-      dw[2] = fui(vp->m22);
-      dw[3] = fui(vp->m30);
-      dw[4] = fui(vp->m31);
-      dw[5] = fui(vp->m32);
-      dw[6] = 0;
-      dw[7] = 0;
-      dw[8] = fui(vp->min_gbx);
-      dw[9] = fui(vp->max_gbx);
-      dw[10] = fui(vp->min_gby);
-      dw[11] = fui(vp->max_gby);
-      dw[12] = 0;
-      dw[13] = 0;
-      dw[14] = 0;
-      dw[15] = 0;
-
-      dw += 16;
-   }
-
-   return state_offset;
-}
-
 void
 ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
                                     unsigned width, unsigned height,
index 321201548867b1f121ef000339cc187b0bc9e36f..e9ddf18a28401f87e1df645583e16c81b7055708 100644 (file)
@@ -490,4 +490,1048 @@ struct ilo_gpe_gen7 {
 const struct ilo_gpe_gen7 *
 ilo_gpe_gen7_get(void);
 
+static inline void
+gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
+                       struct ilo_cp *cp)
+{
+   assert(!"GPGPU_WALKER unsupported");
+}
+
+static inline void
+gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
+                               uint32_t clear_val,
+                               struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
+   const uint8_t cmd_len = 3;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, clear_val);
+   ilo_cp_write(cp, 1);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
+                          int subop, uint32_t pointer,
+                          struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
+   const uint8_t cmd_len = 2;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, pointer);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
+                                    uint32_t color_calc_state,
+                                    struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
+                     const struct ilo_shader_state *gs,
+                     int num_samplers,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
+   const uint8_t cmd_len = 7;
+   const struct ilo_shader_cso *cso;
+   uint32_t dw2, dw4, dw5;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   if (!gs) {
+      ilo_cp_begin(cp, cmd_len);
+      ilo_cp_write(cp, cmd | (cmd_len - 2));
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
+      ilo_cp_write(cp, 0);
+      ilo_cp_end(cp);
+      return;
+   }
+
+   cso = ilo_shader_get_kernel_cso(gs);
+   dw2 = cso->payload[0];
+   dw4 = cso->payload[1];
+   dw5 = cso->payload[2];
+
+   dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
+   ilo_cp_write(cp, dw2);
+   ilo_cp_write(cp, 0); /* scratch */
+   ilo_cp_write(cp, dw4);
+   ilo_cp_write(cp, dw5);
+   ilo_cp_write(cp, 0);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
+                     const struct ilo_rasterizer_state *rasterizer,
+                     const struct pipe_surface *zs_surf,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
+   const uint8_t cmd_len = 7;
+   const int num_samples = 1;
+   uint32_t payload[6];
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
+         rasterizer, num_samples,
+         (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
+         payload, Elements(payload));
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write_multi(cp, payload, 6);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
+                     const struct ilo_shader_state *fs,
+                     const struct ilo_rasterizer_state *rasterizer,
+                     bool cc_may_kill,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
+   const uint8_t cmd_len = 3;
+   const int num_samples = 1;
+   uint32_t dw1, dw2;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   /* see ilo_gpe_init_rasterizer_wm() */
+   dw1 = rasterizer->wm.payload[0];
+   dw2 = rasterizer->wm.payload[1];
+
+   dw1 |= GEN7_WM_STATISTICS_ENABLE;
+
+   if (false) {
+      dw1 |= GEN7_WM_DEPTH_CLEAR;
+      dw1 |= GEN7_WM_DEPTH_RESOLVE;
+      dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
+   }
+
+   if (fs) {
+      const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
+
+      dw1 |= fs_cso->payload[3];
+   }
+
+   if (cc_may_kill) {
+      dw1 |= GEN7_WM_DISPATCH_ENABLE |
+             GEN7_WM_KILL_ENABLE;
+   }
+
+   if (num_samples > 1) {
+      dw1 |= rasterizer->wm.dw_msaa_rast;
+      dw2 |= rasterizer->wm.dw_msaa_disp;
+   }
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, dw1);
+   ilo_cp_write(cp, dw2);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
+                           int subop,
+                           const uint32_t *bufs, const int *sizes,
+                           int num_bufs,
+                           struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
+   const uint8_t cmd_len = 7;
+   uint32_t dw[6];
+   int total_read_length, i;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   /* VS, HS, DS, GS, and PS variants */
+   assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
+
+   assert(num_bufs <= 4);
+
+   dw[0] = 0;
+   dw[1] = 0;
+
+   total_read_length = 0;
+   for (i = 0; i < 4; i++) {
+      int read_len;
+
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 112:
+       *
+       *     "Constant buffers must be enabled in order from Constant Buffer 0
+       *      to Constant Buffer 3 within this command.  For example, it is
+       *      not allowed to enable Constant Buffer 1 by programming a
+       *      non-zero value in the VS Constant Buffer 1 Read Length without a
+       *      non-zero value in VS Constant Buffer 0 Read Length."
+       */
+      if (i >= num_bufs || !sizes[i]) {
+         for (; i < 4; i++) {
+            assert(i >= num_bufs || !sizes[i]);
+            dw[2 + i] = 0;
+         }
+         break;
+      }
+
+      /* read lengths are in 256-bit units */
+      read_len = (sizes[i] + 31) / 32;
+      /* the lower 5 bits are used for memory object control state */
+      assert(bufs[i] % 32 == 0);
+
+      dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
+      dw[2 + i] = bufs[i];
+
+      total_read_length += read_len;
+   }
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 113:
+    *
+    *     "The sum of all four read length fields must be less than or equal
+    *      to the size of 64"
+    */
+   assert(total_read_length <= 64);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write_multi(cp, dw, 6);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
+                              const uint32_t *bufs, const int *sizes,
+                              int num_bufs,
+                              struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
+                              const uint32_t *bufs, const int *sizes,
+                              int num_bufs,
+                              struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
+                              const uint32_t *bufs, const int *sizes,
+                              int num_bufs,
+                              struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
+                              unsigned sample_mask,
+                              int num_samples,
+                              struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
+   const uint8_t cmd_len = 2;
+   const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 294:
+    *
+    *     "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
+    *      (Sample Mask) must be zero.
+    *
+    *      If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
+    *      must be zero."
+    */
+   sample_mask &= valid_mask;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, sample_mask);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
+                              const uint32_t *bufs, const int *sizes,
+                              int num_bufs,
+                              struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
+                              const uint32_t *bufs, const int *sizes,
+                              int num_bufs,
+                              struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
+                     const struct ilo_shader_state *hs,
+                     int num_samplers,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
+   const uint8_t cmd_len = 7;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   assert(!hs);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
+   const uint8_t cmd_len = 4;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
+                     const struct ilo_shader_state *ds,
+                     int num_samplers,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
+   const uint8_t cmd_len = 6;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   assert(!ds);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_write(cp, 0);
+   ilo_cp_end(cp);
+
+}
+
+static inline void
+gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
+                            unsigned buffer_mask,
+                            int vertex_attrib_count,
+                            bool rasterizer_discard,
+                            struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
+   const uint8_t cmd_len = 3;
+   const bool enable = (buffer_mask != 0);
+   uint32_t dw1, dw2;
+   int read_len;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   if (!enable) {
+      dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
+      if (rasterizer_discard)
+         dw1 |= SO_RENDERING_DISABLE;
+
+      dw2 = 0;
+
+      ilo_cp_begin(cp, cmd_len);
+      ilo_cp_write(cp, cmd | (cmd_len - 2));
+      ilo_cp_write(cp, dw1);
+      ilo_cp_write(cp, dw2);
+      ilo_cp_end(cp);
+      return;
+   }
+
+   read_len = (vertex_attrib_count + 1) / 2;
+   if (!read_len)
+      read_len = 1;
+
+   dw1 = SO_FUNCTION_ENABLE |
+         0 << SO_RENDER_STREAM_SELECT_SHIFT |
+         SO_STATISTICS_ENABLE |
+         buffer_mask << 8;
+
+   if (rasterizer_discard)
+      dw1 |= SO_RENDERING_DISABLE;
+
+   /* API_OPENGL */
+   if (true)
+      dw1 |= SO_REORDER_TRAILING;
+
+   dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
+         0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
+         0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
+         0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
+         0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
+         0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
+         0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
+         (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, dw1);
+   ilo_cp_write(cp, dw2);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
+                      const struct ilo_rasterizer_state *rasterizer,
+                      const struct ilo_shader_state *fs,
+                      const struct ilo_shader_state *last_sh,
+                      struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
+   const uint8_t cmd_len = 14;
+   uint32_t dw[13];
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
+         fs, last_sh, dw, Elements(dw));
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write_multi(cp, dw, 13);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
+                     const struct ilo_shader_state *fs,
+                     int num_samplers, bool dual_blend,
+                     struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
+   const uint8_t cmd_len = 8;
+   const struct ilo_shader_cso *cso;
+   uint32_t dw2, dw4, dw5;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   if (!fs) {
+      /* see brwCreateContext() */
+      const int max_threads = (dev->gt == 2) ? 172 : 48;
+
+      ilo_cp_begin(cp, cmd_len);
+      ilo_cp_write(cp, cmd | (cmd_len - 2));
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      /* GPU hangs if none of the dispatch enable bits is set */
+      ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
+                       GEN7_PS_8_DISPATCH_ENABLE);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_end(cp);
+
+      return;
+   }
+
+   cso = ilo_shader_get_kernel_cso(fs);
+   dw2 = cso->payload[0];
+   dw4 = cso->payload[1];
+   dw5 = cso->payload[2];
+
+   dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT;
+
+   if (dual_blend)
+      dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
+   ilo_cp_write(cp, dw2);
+   ilo_cp_write(cp, 0); /* scratch */
+   ilo_cp_write(cp, dw4);
+   ilo_cp_write(cp, dw5);
+   ilo_cp_write(cp, 0); /* kernel 1 */
+   ilo_cp_write(cp, 0); /* kernel 2 */
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
+                                                  uint32_t sf_clip_viewport,
+                                                  struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
+                                             uint32_t cc_viewport,
+                                             struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
+                                       uint32_t blend_state,
+                                       struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
+                                               uint32_t depth_stencil_state,
+                                               struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
+                                            uint32_t binding_table,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
+                                            uint32_t binding_table,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
+                                            uint32_t binding_table,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
+                                            uint32_t binding_table,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
+                                            uint32_t binding_table,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
+                                            uint32_t sampler_state,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
+                                            uint32_t sampler_state,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
+                                            uint32_t sampler_state,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
+                                            uint32_t sampler_state,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
+                                            uint32_t sampler_state,
+                                            struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
+}
+
+static inline void
+gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
+                      int subop, int offset, int size,
+                      int entry_size,
+                      struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
+   const uint8_t cmd_len = 2;
+   const int row_size = 64; /* 512 bits */
+   int alloc_size, num_entries, min_entries, max_entries;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   /* VS, HS, DS, and GS variants */
+   assert(subop >= 0x30 && subop <= 0x33);
+
+   /* in multiples of 8KB */
+   assert(offset % 8192 == 0);
+   offset /= 8192;
+
+   /* in multiple of 512-bit rows */
+   alloc_size = (entry_size + row_size - 1) / row_size;
+   if (!alloc_size)
+      alloc_size = 1;
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 34:
+    *
+    *     "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
+    *      cause performance to decrease due to banking in the URB. Element
+    *      sizes of 16 to 20 should be programmed with six 512-bit URB rows."
+    */
+   if (subop == 0x30 && alloc_size == 5)
+      alloc_size = 6;
+
+   /* in multiples of 8 */
+   num_entries = (size / row_size / alloc_size) & ~7;
+
+   switch (subop) {
+   case 0x30: /* 3DSTATE_URB_VS */
+      min_entries = 32;
+      max_entries = (dev->gt == 2) ? 704 : 512;
+
+      assert(num_entries >= min_entries);
+      if (num_entries > max_entries)
+         num_entries = max_entries;
+      break;
+   case 0x31: /* 3DSTATE_URB_HS */
+      max_entries = (dev->gt == 2) ? 64 : 32;
+      if (num_entries > max_entries)
+         num_entries = max_entries;
+      break;
+   case 0x32: /* 3DSTATE_URB_DS */
+      if (num_entries)
+         assert(num_entries >= 138);
+      break;
+   case 0x33: /* 3DSTATE_URB_GS */
+      max_entries = (dev->gt == 2) ? 320 : 192;
+      if (num_entries > max_entries)
+         num_entries = max_entries;
+      break;
+   default:
+      break;
+   }
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
+                    (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
+                    num_entries);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
+                         int offset, int size, int entry_size,
+                         struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
+                         int offset, int size, int entry_size,
+                         struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
+                         int offset, int size, int entry_size,
+                         struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
+                         int offset, int size, int entry_size,
+                         struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
+}
+
+static inline void
+gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
+                                      int subop, int offset, int size,
+                                      struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
+   const uint8_t cmd_len = 2;
+   int end;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   /* VS, HS, DS, GS, and PS variants */
+   assert(subop >= 0x12 && subop <= 0x16);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 68:
+    *
+    *     "(A table that says the maximum size of each constant buffer is
+    *      16KB")
+    *
+    * From the Ivy Bridge PRM, volume 2 part 1, page 115:
+    *
+    *     "The sum of the Constant Buffer Offset and the Constant Buffer Size
+    *      may not exceed the maximum value of the Constant Buffer Size."
+    *
+    * Thus, the valid range of buffer end is [0KB, 16KB].
+    */
+   end = (offset + size) / 1024;
+   if (end > 16) {
+      assert(!"invalid constant buffer end");
+      end = 16;
+   }
+
+   /* the valid range of buffer offset is [0KB, 15KB] */
+   offset = (offset + 1023) / 1024;
+   if (offset > 15) {
+      assert(!"invalid constant buffer offset");
+      offset = 15;
+   }
+
+   if (offset > end) {
+      assert(!size);
+      offset = end;
+   }
+
+   /* the valid range of buffer size is [0KB, 15KB] */
+   size = end - offset;
+   if (size > 15) {
+      assert(!"invalid constant buffer size");
+      size = 15;
+   }
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
+                    size);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
+                                         int offset, int size,
+                                         struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
+                                         int offset, int size,
+                                         struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
+                                         int offset, int size,
+                                         struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
+                                         int offset, int size,
+                                         struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
+                                         int offset, int size,
+                                         struct ilo_cp *cp)
+{
+   gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
+                               const struct pipe_stream_output_info *so_info,
+                               struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
+   uint16_t cmd_len;
+   int buffer_selects, num_entries, i;
+   uint16_t so_decls[128];
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   buffer_selects = 0;
+   num_entries = 0;
+
+   if (so_info) {
+      int buffer_offsets[PIPE_MAX_SO_BUFFERS];
+
+      memset(buffer_offsets, 0, sizeof(buffer_offsets));
+
+      for (i = 0; i < so_info->num_outputs; i++) {
+         unsigned decl, buf, reg, mask;
+
+         buf = so_info->output[i].output_buffer;
+
+         /* pad with holes */
+         assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
+         while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
+            int num_dwords;
+
+            num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
+            if (num_dwords > 4)
+               num_dwords = 4;
+
+            decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
+                   SO_DECL_HOLE_FLAG |
+                   ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
+
+            so_decls[num_entries++] = decl;
+            buffer_offsets[buf] += num_dwords;
+         }
+
+         reg = so_info->output[i].register_index;
+         mask = ((1 << so_info->output[i].num_components) - 1) <<
+            so_info->output[i].start_component;
+
+         decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
+                reg << SO_DECL_REGISTER_INDEX_SHIFT |
+                mask << SO_DECL_COMPONENT_MASK_SHIFT;
+
+         so_decls[num_entries++] = decl;
+         buffer_selects |= 1 << buf;
+         buffer_offsets[buf] += so_info->output[i].num_components;
+      }
+   }
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 201:
+    *
+    *     "Errata: All 128 decls for all four streams must be included
+    *      whenever this command is issued. The "Num Entries [n]" fields still
+    *      contain the actual numbers of valid decls."
+    *
+    * Also note that "DWord Length" has 9 bits for this command, and the type
+    * of cmd_len is thus uint16_t.
+    */
+   cmd_len = 2 * 128 + 3;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
+                    0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
+                    0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
+                    buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
+   ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
+                    0 << SO_NUM_ENTRIES_2_SHIFT |
+                    0 << SO_NUM_ENTRIES_1_SHIFT |
+                    num_entries << SO_NUM_ENTRIES_0_SHIFT);
+
+   for (i = 0; i < num_entries; i++) {
+      ilo_cp_write(cp, so_decls[i]);
+      ilo_cp_write(cp, 0);
+   }
+   for (; i < 128; i++) {
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+   }
+
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
+                            int index, int base, int stride,
+                            const struct pipe_stream_output_target *so_target,
+                            struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
+   const uint8_t cmd_len = 4;
+   struct ilo_buffer *buf;
+   int end;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   if (!so_target || !so_target->buffer) {
+      ilo_cp_begin(cp, cmd_len);
+      ilo_cp_write(cp, cmd | (cmd_len - 2));
+      ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
+      ilo_cp_write(cp, 0);
+      ilo_cp_write(cp, 0);
+      ilo_cp_end(cp);
+      return;
+   }
+
+   buf = ilo_buffer(so_target->buffer);
+
+   /* DWord-aligned */
+   assert(stride % 4 == 0 && base % 4 == 0);
+   assert(so_target->buffer_offset % 4 == 0);
+
+   stride &= ~3;
+   base = (base + so_target->buffer_offset) & ~3;
+   end = (base + so_target->buffer_size) & ~3;
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
+                    stride);
+   ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+   ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+   ilo_cp_end(cp);
+}
+
+static inline void
+gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
+                      const struct pipe_draw_info *info,
+                      const struct ilo_ib_state *ib,
+                      bool rectlist,
+                      struct ilo_cp *cp)
+{
+   const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
+   const uint8_t cmd_len = 7;
+   const int prim = (rectlist) ?
+      _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
+   const int vb_access = (info->indexed) ?
+      GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
+      GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+   const uint32_t vb_start = info->start +
+      ((info->indexed) ? ib->draw_start_offset : 0);
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   ilo_cp_begin(cp, cmd_len);
+   ilo_cp_write(cp, cmd | (cmd_len - 2));
+   ilo_cp_write(cp, vb_access | prim);
+   ilo_cp_write(cp, info->count);
+   ilo_cp_write(cp, vb_start);
+   ilo_cp_write(cp, info->instance_count);
+   ilo_cp_write(cp, info->start_instance);
+   ilo_cp_write(cp, info->index_bias);
+   ilo_cp_end(cp);
+}
+
+static inline uint32_t
+gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
+                           const struct ilo_viewport_cso *viewports,
+                           unsigned num_viewports,
+                           struct ilo_cp *cp)
+{
+   const int state_align = 64 / 4;
+   const int state_len = 16 * num_viewports;
+   uint32_t state_offset, *dw;
+   unsigned i;
+
+   ILO_GPE_VALID_GEN(dev, 7, 7);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 270:
+    *
+    *     "The viewport-specific state used by both the SF and CL units
+    *      (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
+    *      of which contains the DWords described below. The start of each
+    *      element is spaced 16 DWords apart. The location of first element of
+    *      the array, as specified by both Pointer to SF_VIEWPORT and Pointer
+    *      to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
+    */
+   assert(num_viewports && num_viewports <= 16);
+
+   dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
+         state_len, state_align, &state_offset);
+
+   for (i = 0; i < num_viewports; i++) {
+      const struct ilo_viewport_cso *vp = &viewports[i];
+
+      dw[0] = fui(vp->m00);
+      dw[1] = fui(vp->m11);
+      dw[2] = fui(vp->m22);
+      dw[3] = fui(vp->m30);
+      dw[4] = fui(vp->m31);
+      dw[5] = fui(vp->m32);
+      dw[6] = 0;
+      dw[7] = 0;
+      dw[8] = fui(vp->min_gbx);
+      dw[9] = fui(vp->max_gbx);
+      dw[10] = fui(vp->min_gby);
+      dw[11] = fui(vp->max_gby);
+      dw[12] = 0;
+      dw[13] = 0;
+      dw[14] = 0;
+      dw[15] = 0;
+
+      dw += 16;
+   }
+
+   return state_offset;
+}
+
 #endif /* ILO_GPE_GEN7_H */