ilo: convert GPE GEN6 command functions to use ilo_builder
authorChia-I Wu <olvaffe@gmail.com>
Sun, 7 Sep 2014 15:45:03 +0000 (23:45 +0800)
committerChia-I Wu <olvaffe@gmail.com>
Tue, 9 Sep 2014 05:31:37 +0000 (13:31 +0800)
Similar to the changes to GEN7 command functions, but to GEN6 this time.

As every GPE function has been converted, remove
ilo_cp_assert_no_implicit_flush() calls.

src/gallium/drivers/ilo/ilo_3d_pipeline.c
src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
src/gallium/drivers/ilo/ilo_gpe_gen6.h

index bea5406404f391330517234841a1b4db9c01b373..26d3ec3e327de12c6e38aec91f6492ba3f92a3c5 100644 (file)
@@ -180,9 +180,7 @@ ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p,
       handle_invalid_batch_bo(p, false);
 
       /* draw! */
-      ilo_cp_assert_no_implicit_flush(p->cp, true);
       p->emit_draw(p, ilo);
-      ilo_cp_assert_no_implicit_flush(p->cp, false);
 
       if (ilo_builder_validate(&ilo->cp->builder, 0, NULL)) {
          success = true;
@@ -286,9 +284,7 @@ ilo_3d_pipeline_emit_rectlist(struct ilo_3d_pipeline *p,
 
       handle_invalid_batch_bo(p, false);
 
-      ilo_cp_assert_no_implicit_flush(p->cp, true);
       p->emit_rectlist(p, blitter);
-      ilo_cp_assert_no_implicit_flush(p->cp, false);
 
       if (!ilo_builder_validate(&p->cp->builder, 0, NULL)) {
          /* rewind */
index 27a44ab35971da24a4a8c1d0c58b23d35fca846e..4ff00a170308d1e9f18761d46cb6f5abf57e20c5 100644 (file)
@@ -66,10 +66,10 @@ gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
     *
     * The workaround below necessitates this workaround.
     */
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_CS_STALL |
          GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 
    /* the caller will emit the post-sync op */
    if (caller_post_sync)
@@ -85,9 +85,9 @@ gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
     *     "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a
     *      PIPE_CONTROL with any non-zero post-sync-op is required."
     */
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_WRITE_IMM,
-         p->workaround_bo, 0, false, p->cp);
+         p->workaround_bo, 0, false);
 }
 
 static void
@@ -105,10 +105,10 @@ gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p)
     *      requires driver to send a PIPE_CONTROL with a CS stall along with a
     *      Depth Flush prior to this command."
     */
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
          GEN6_PIPE_CONTROL_CS_STALL,
-         0, 0, false, p->cp);
+         0, 0, false);
 }
 
 static void
@@ -123,17 +123,17 @@ gen6_wa_pipe_control_wm_depth_flush(struct ilo_3d_pipeline *p)
     * to emit a sequence of PIPE_CONTROLs prior to emitting depth related
     * commands.
     */
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_STALL,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_STALL,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 }
 
 static void
@@ -152,9 +152,9 @@ gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
     *      field set (DW1 Bit 1), must be issued prior to any change to the
     *      value in this field (Maximum Number of Threads in 3DSTATE_WM)"
     */
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 
 }
 
@@ -170,11 +170,11 @@ gen6_wa_pipe_control_vs_const_flush(struct ilo_3d_pipeline *p)
     * PIPE_CONTROL after 3DSTATE_CONSTANT_VS so that the command is kept being
     * buffered by VS FF, to the point that the FF dies.
     */
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_STALL |
          GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
          GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 }
 
 #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
@@ -189,7 +189,7 @@ gen6_pipeline_common_select(struct ilo_3d_pipeline *p,
       if (p->dev->gen == ILO_GEN(6))
          gen6_wa_pipe_control_post_sync(p, false);
 
-      gen6_emit_PIPELINE_SELECT(p->dev, 0x0, p->cp);
+      gen6_PIPELINE_SELECT(&p->cp->builder, 0x0);
    }
 }
 
@@ -203,7 +203,7 @@ gen6_pipeline_common_sip(struct ilo_3d_pipeline *p,
       if (p->dev->gen == ILO_GEN(6))
          gen6_wa_pipe_control_post_sync(p, false);
 
-      gen6_emit_STATE_SIP(p->dev, 0, p->cp);
+      gen6_STATE_SIP(&p->cp->builder, 0);
    }
 }
 
@@ -313,8 +313,8 @@ gen6_pipeline_common_urb(struct ilo_3d_pipeline *p,
          gs_total_size = 0;
       }
 
-      gen6_emit_3DSTATE_URB(p->dev, vs_total_size, gs_total_size,
-            vs_entry_size, gs_entry_size, p->cp);
+      gen6_3DSTATE_URB(&p->cp->builder, vs_total_size, gs_total_size,
+            vs_entry_size, gs_entry_size);
 
       /*
        * From the Sandy Bridge PRM, volume 2 part 1, page 27:
@@ -339,10 +339,10 @@ gen6_pipeline_common_pointers_1(struct ilo_3d_pipeline *p,
 {
    /* 3DSTATE_VIEWPORT_STATE_POINTERS */
    if (session->viewport_state_changed) {
-      gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev,
+      gen6_3DSTATE_VIEWPORT_STATE_POINTERS(&p->cp->builder,
             p->state.CLIP_VIEWPORT,
             p->state.SF_VIEWPORT,
-            p->state.CC_VIEWPORT, p->cp);
+            p->state.CC_VIEWPORT);
    }
 }
 
@@ -355,20 +355,20 @@ gen6_pipeline_common_pointers_2(struct ilo_3d_pipeline *p,
    if (session->cc_state_blend_changed ||
        session->cc_state_dsa_changed ||
        session->cc_state_cc_changed) {
-      gen6_emit_3DSTATE_CC_STATE_POINTERS(p->dev,
+      gen6_3DSTATE_CC_STATE_POINTERS(&p->cp->builder,
             p->state.BLEND_STATE,
             p->state.DEPTH_STENCIL_STATE,
-            p->state.COLOR_CALC_STATE, p->cp);
+            p->state.COLOR_CALC_STATE);
    }
 
    /* 3DSTATE_SAMPLER_STATE_POINTERS */
    if (session->sampler_state_vs_changed ||
        session->sampler_state_gs_changed ||
        session->sampler_state_fs_changed) {
-      gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(p->dev,
+      gen6_3DSTATE_SAMPLER_STATE_POINTERS(&p->cp->builder,
             p->state.vs.SAMPLER_STATE,
             0,
-            p->state.wm.SAMPLER_STATE, p->cp);
+            p->state.wm.SAMPLER_STATE);
    }
 }
 
@@ -379,18 +379,18 @@ gen6_pipeline_common_pointers_3(struct ilo_3d_pipeline *p,
 {
    /* 3DSTATE_SCISSOR_STATE_POINTERS */
    if (session->scissor_state_changed) {
-      gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(p->dev,
-            p->state.SCISSOR_RECT, p->cp);
+      gen6_3DSTATE_SCISSOR_STATE_POINTERS(&p->cp->builder,
+            p->state.SCISSOR_RECT);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS */
    if (session->binding_table_vs_changed ||
        session->binding_table_gs_changed ||
        session->binding_table_fs_changed) {
-      gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(p->dev,
+      gen6_3DSTATE_BINDING_TABLE_POINTERS(&p->cp->builder,
             p->state.vs.BINDING_TABLE_STATE,
             p->state.gs.BINDING_TABLE_STATE,
-            p->state.wm.BINDING_TABLE_STATE, p->cp);
+            p->state.wm.BINDING_TABLE_STATE);
    }
 }
 
@@ -402,8 +402,8 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p,
    if (p->dev->gen >= ILO_GEN(7.5)) {
       /* 3DSTATE_INDEX_BUFFER */
       if (DIRTY(IB) || session->batch_bo_changed) {
-         gen6_emit_3DSTATE_INDEX_BUFFER(p->dev,
-               &ilo->ib, false, p->cp);
+         gen6_3DSTATE_INDEX_BUFFER(&p->cp->builder,
+               &ilo->ib, false);
       }
 
       /* 3DSTATE_VF */
@@ -416,14 +416,14 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p,
       /* 3DSTATE_INDEX_BUFFER */
       if (DIRTY(IB) || session->primitive_restart_changed ||
           session->batch_bo_changed) {
-         gen6_emit_3DSTATE_INDEX_BUFFER(p->dev,
-               &ilo->ib, ilo->draw->primitive_restart, p->cp);
+         gen6_3DSTATE_INDEX_BUFFER(&p->cp->builder,
+               &ilo->ib, ilo->draw->primitive_restart);
       }
    }
 
    /* 3DSTATE_VERTEX_BUFFERS */
    if (DIRTY(VB) || DIRTY(VE) || session->batch_bo_changed)
-      gen6_emit_3DSTATE_VERTEX_BUFFERS(p->dev, ilo->ve, &ilo->vb, p->cp);
+      gen6_3DSTATE_VERTEX_BUFFERS(&p->cp->builder, ilo->ve, &ilo->vb);
 
    /* 3DSTATE_VERTEX_ELEMENTS */
    if (DIRTY(VE) || DIRTY(VS)) {
@@ -448,8 +448,8 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p,
             prepend_generate_ids = true;
       }
 
-      gen6_emit_3DSTATE_VERTEX_ELEMENTS(p->dev, ve,
-            last_velement_edgeflag, prepend_generate_ids, p->cp);
+      gen6_3DSTATE_VERTEX_ELEMENTS(&p->cp->builder, ve,
+            last_velement_edgeflag, prepend_generate_ids);
    }
 }
 
@@ -460,7 +460,7 @@ gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p,
 {
    /* 3DSTATE_VF_STATISTICS */
    if (session->hw_ctx_changed)
-      gen6_emit_3DSTATE_VF_STATISTICS(p->dev, false, p->cp);
+      gen6_3DSTATE_VF_STATISTICS(&p->cp->builder, false);
 }
 
 static void
@@ -469,7 +469,7 @@ gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p,
                       struct gen6_pipeline_session *session)
 {
    /* 3DPRIMITIVE */
-   gen6_emit_3DPRIMITIVE(p->dev, ilo->draw, &ilo->ib, false, p->cp);
+   gen6_3DPRIMITIVE(&p->cp->builder, ilo->draw, &ilo->ib, false);
    p->state.has_gen6_wa_pipe_control = false;
 }
 
@@ -491,17 +491,17 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
 
    /* 3DSTATE_CONSTANT_VS */
    if (emit_3dstate_constant_vs) {
-      gen6_emit_3DSTATE_CONSTANT_VS(p->dev,
+      gen6_3DSTATE_CONSTANT_VS(&p->cp->builder,
             &p->state.vs.PUSH_CONSTANT_BUFFER,
             &p->state.vs.PUSH_CONSTANT_BUFFER_size,
-            1, p->cp);
+            1);
    }
 
    /* 3DSTATE_VS */
    if (emit_3dstate_vs) {
       const int num_samplers = ilo->sampler[PIPE_SHADER_VERTEX].count;
 
-      gen6_emit_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp);
+      gen6_3DSTATE_VS(&p->cp->builder, ilo->vs, num_samplers);
    }
 
    if (emit_3dstate_constant_vs && p->dev->gen == ILO_GEN(6))
@@ -515,14 +515,14 @@ gen6_pipeline_gs(struct ilo_3d_pipeline *p,
 {
    /* 3DSTATE_CONSTANT_GS */
    if (session->pcb_state_gs_changed)
-      gen6_emit_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp);
+      gen6_3DSTATE_CONSTANT_GS(&p->cp->builder, NULL, NULL, 0);
 
    /* 3DSTATE_GS */
    if (DIRTY(GS) || DIRTY(VS) ||
        session->prim_changed || session->kernel_bo_changed) {
       const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
 
-      gen6_emit_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp);
+      gen6_3DSTATE_GS(&p->cp->builder, ilo->gs, ilo->vs, verts_per_prim);
    }
 }
 
@@ -582,9 +582,9 @@ gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p,
       if (p->dev->gen == ILO_GEN(6))
          gen6_wa_pipe_control_post_sync(p, false);
 
-      gen6_emit_3DSTATE_GS_SVB_INDEX(p->dev,
+      gen6_3DSTATE_GS_SVB_INDEX(&p->cp->builder,
             0, p->state.so_num_vertices, p->state.so_max_vertices,
-            false, p->cp);
+            false);
 
       if (session->hw_ctx_changed) {
          int i;
@@ -599,8 +599,8 @@ gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p,
           *      0xFFFFFFFF in order to not cause overflow in that SVBI."
           */
          for (i = 1; i < 4; i++) {
-            gen6_emit_3DSTATE_GS_SVB_INDEX(p->dev,
-                  i, 0, 0xffffffff, false, p->cp);
+            gen6_3DSTATE_GS_SVB_INDEX(&p->cp->builder,
+                  i, 0, 0xffffffff, false);
          }
       }
    }
@@ -630,8 +630,8 @@ gen6_pipeline_clip(struct ilo_3d_pipeline *p,
          }
       }
 
-      gen6_emit_3DSTATE_CLIP(p->dev, ilo->rasterizer,
-            ilo->fs, enable_guardband, 1, p->cp);
+      gen6_3DSTATE_CLIP(&p->cp->builder, ilo->rasterizer,
+            ilo->fs, enable_guardband, 1);
    }
 }
 
@@ -642,7 +642,7 @@ gen6_pipeline_sf(struct ilo_3d_pipeline *p,
 {
    /* 3DSTATE_SF */
    if (DIRTY(RASTERIZER) || DIRTY(FS))
-      gen6_emit_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fs, p->cp);
+      gen6_3DSTATE_SF(&p->cp->builder, ilo->rasterizer, ilo->fs);
 }
 
 void
@@ -655,8 +655,8 @@ gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p,
       if (p->dev->gen == ILO_GEN(6))
          gen6_wa_pipe_control_post_sync(p, false);
 
-      gen6_emit_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0,
-            ilo->fb.state.width, ilo->fb.state.height, p->cp);
+      gen6_3DSTATE_DRAWING_RECTANGLE(&p->cp->builder, 0, 0,
+            ilo->fb.state.width, ilo->fb.state.height);
    }
 }
 
@@ -667,10 +667,10 @@ gen6_pipeline_wm(struct ilo_3d_pipeline *p,
 {
    /* 3DSTATE_CONSTANT_PS */
    if (session->pcb_state_fs_changed) {
-      gen6_emit_3DSTATE_CONSTANT_PS(p->dev,
+      gen6_3DSTATE_CONSTANT_PS(&p->cp->builder,
             &p->state.wm.PUSH_CONSTANT_BUFFER,
             &p->state.wm.PUSH_CONSTANT_BUFFER_size,
-            1, p->cp);
+            1);
    }
 
    /* 3DSTATE_WM */
@@ -684,8 +684,8 @@ gen6_pipeline_wm(struct ilo_3d_pipeline *p,
       if (p->dev->gen == ILO_GEN(6) && session->hw_ctx_changed)
          gen6_wa_pipe_control_wm_max_threads_stall(p);
 
-      gen6_emit_3DSTATE_WM(p->dev, ilo->fs, num_samplers,
-            ilo->rasterizer, dual_blend, cc_may_kill, 0, p->cp);
+      gen6_3DSTATE_WM(&p->cp->builder, ilo->fs, num_samplers,
+            ilo->rasterizer, dual_blend, cc_may_kill, 0);
    }
 }
 
@@ -706,12 +706,12 @@ gen6_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
          gen6_wa_pipe_control_wm_multisample_flush(p);
       }
 
-      gen6_emit_3DSTATE_MULTISAMPLE(p->dev,
+      gen6_3DSTATE_MULTISAMPLE(&p->cp->builder,
             ilo->fb.num_samples, packed_sample_pos,
-            ilo->rasterizer->state.half_pixel_center, p->cp);
+            ilo->rasterizer->state.half_pixel_center);
 
-      gen6_emit_3DSTATE_SAMPLE_MASK(p->dev,
-            (ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1, p->cp);
+      gen6_3DSTATE_SAMPLE_MASK(&p->cp->builder,
+            (ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1);
    }
 }
 
@@ -747,10 +747,10 @@ gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p,
          gen6_wa_pipe_control_wm_depth_flush(p);
       }
 
-      gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp);
-      gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp);
-      gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp);
-      gen6_emit_3DSTATE_CLEAR_PARAMS(p->dev, clear_params, p->cp);
+      gen6_3DSTATE_DEPTH_BUFFER(&p->cp->builder, zs);
+      gen6_3DSTATE_HIER_DEPTH_BUFFER(&p->cp->builder, zs);
+      gen6_3DSTATE_STENCIL_BUFFER(&p->cp->builder, zs);
+      gen6_3DSTATE_CLEAR_PARAMS(&p->cp->builder, clear_params);
    }
 }
 
@@ -765,10 +765,10 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
       if (p->dev->gen == ILO_GEN(6))
          gen6_wa_pipe_control_post_sync(p, false);
 
-      gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(p->dev,
-            &ilo->poly_stipple, p->cp);
+      gen6_3DSTATE_POLY_STIPPLE_PATTERN(&p->cp->builder,
+            &ilo->poly_stipple);
 
-      gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(p->dev, 0, 0, p->cp);
+      gen6_3DSTATE_POLY_STIPPLE_OFFSET(&p->cp->builder, 0, 0);
    }
 
    /* 3DSTATE_LINE_STIPPLE */
@@ -776,9 +776,9 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
       if (p->dev->gen == ILO_GEN(6))
          gen6_wa_pipe_control_post_sync(p, false);
 
-      gen6_emit_3DSTATE_LINE_STIPPLE(p->dev,
+      gen6_3DSTATE_LINE_STIPPLE(&p->cp->builder,
             ilo->rasterizer->state.line_stipple_pattern,
-            ilo->rasterizer->state.line_stipple_factor + 1, p->cp);
+            ilo->rasterizer->state.line_stipple_factor + 1);
    }
 
    /* 3DSTATE_AA_LINE_PARAMETERS */
@@ -786,7 +786,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
       if (p->dev->gen == ILO_GEN(6))
          gen6_wa_pipe_control_post_sync(p, false);
 
-      gen6_emit_3DSTATE_AA_LINE_PARAMETERS(p->dev, p->cp);
+      gen6_3DSTATE_AA_LINE_PARAMETERS(&p->cp->builder);
    }
 }
 
@@ -1454,7 +1454,7 @@ ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p)
    if (p->dev->gen == ILO_GEN(6))
       gen6_wa_pipe_control_post_sync(p, false);
 
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
          GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
          GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
@@ -1462,7 +1462,7 @@ ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p)
          GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
          GEN6_PIPE_CONTROL_WRITE_NONE |
          GEN6_PIPE_CONTROL_CS_STALL,
-         0, 0, false, p->cp);
+         0, 0, false);
 }
 
 void
@@ -1472,10 +1472,10 @@ ilo_3d_pipeline_emit_write_timestamp_gen6(struct ilo_3d_pipeline *p,
    if (p->dev->gen == ILO_GEN(6))
       gen6_wa_pipe_control_post_sync(p, true);
 
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_WRITE_TIMESTAMP,
          bo, index * sizeof(uint64_t),
-         true, p->cp);
+         true);
 }
 
 void
@@ -1485,11 +1485,11 @@ ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p,
    if (p->dev->gen == ILO_GEN(6))
       gen6_wa_pipe_control_post_sync(p, false);
 
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_STALL |
          GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT,
          bo, index * sizeof(uint64_t),
-         true, p->cp);
+         true);
 }
 
 void
@@ -1518,15 +1518,15 @@ ilo_3d_pipeline_emit_write_statistics_gen6(struct ilo_3d_pipeline *p,
 
       if (regs[i]) {
          /* store lower 32 bits */
-         gen6_emit_MI_STORE_REGISTER_MEM(p->dev,
-               bo, bo_offset, regs[i], p->cp);
+         gen6_MI_STORE_REGISTER_MEM(&p->cp->builder,
+               bo, bo_offset, regs[i]);
          /* store higher 32 bits */
-         gen6_emit_MI_STORE_REGISTER_MEM(p->dev,
-               bo, bo_offset + 4, regs[i] + 4, p->cp);
+         gen6_MI_STORE_REGISTER_MEM(&p->cp->builder,
+               bo, bo_offset + 4, regs[i] + 4);
       }
       else {
-         gen6_emit_MI_STORE_DATA_IMM(p->dev,
-               bo, bo_offset, 0, true, p->cp);
+         gen6_MI_STORE_DATA_IMM(&p->cp->builder,
+               bo, bo_offset, 0, true);
       }
    }
 }
@@ -1536,16 +1536,16 @@ gen6_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
                        const struct ilo_blitter *blitter,
                        struct gen6_rectlist_session *session)
 {
-   gen6_emit_3DSTATE_CONSTANT_VS(p->dev, NULL, NULL, 0, p->cp);
-   gen6_emit_3DSTATE_VS(p->dev, NULL, 0, p->cp);
+   gen6_3DSTATE_CONSTANT_VS(&p->cp->builder, NULL, NULL, 0);
+   gen6_3DSTATE_VS(&p->cp->builder, NULL, 0);
 
    gen6_wa_pipe_control_vs_const_flush(p);
 
-   gen6_emit_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp);
-   gen6_emit_3DSTATE_GS(p->dev, NULL, NULL, 0, p->cp);
+   gen6_3DSTATE_CONSTANT_GS(&p->cp->builder, NULL, NULL, 0);
+   gen6_3DSTATE_GS(&p->cp->builder, NULL, NULL, 0);
 
-   gen6_emit_3DSTATE_CLIP(p->dev, NULL, NULL, false, 0, p->cp);
-   gen6_emit_3DSTATE_SF(p->dev, NULL, NULL, p->cp);
+   gen6_3DSTATE_CLIP(&p->cp->builder, NULL, NULL, false, 0);
+   gen6_3DSTATE_SF(&p->cp->builder, NULL, NULL);
 }
 
 static void
@@ -1570,10 +1570,10 @@ gen6_rectlist_wm(struct ilo_3d_pipeline *p,
       break;
    }
 
-   gen6_emit_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp);
+   gen6_3DSTATE_CONSTANT_PS(&p->cp->builder, NULL, NULL, 0);
 
    gen6_wa_pipe_control_wm_max_threads_stall(p);
-   gen6_emit_3DSTATE_WM(p->dev, NULL, 0, NULL, false, false, hiz_op, p->cp);
+   gen6_3DSTATE_WM(&p->cp->builder, NULL, 0, NULL, false, false, hiz_op);
 }
 
 static void
@@ -1585,22 +1585,22 @@ gen6_rectlist_wm_depth(struct ilo_3d_pipeline *p,
 
    if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
                         ILO_BLITTER_USE_FB_STENCIL)) {
-      gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev,
-            &blitter->fb.dst.u.zs, p->cp);
+      gen6_3DSTATE_DEPTH_BUFFER(&p->cp->builder,
+            &blitter->fb.dst.u.zs);
    }
 
    if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
-      gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev,
-            &blitter->fb.dst.u.zs, p->cp);
+      gen6_3DSTATE_HIER_DEPTH_BUFFER(&p->cp->builder,
+            &blitter->fb.dst.u.zs);
    }
 
    if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) {
-      gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev,
-            &blitter->fb.dst.u.zs, p->cp);
+      gen6_3DSTATE_STENCIL_BUFFER(&p->cp->builder,
+            &blitter->fb.dst.u.zs);
    }
 
-   gen6_emit_3DSTATE_CLEAR_PARAMS(p->dev,
-         blitter->depth_clear_value, p->cp);
+   gen6_3DSTATE_CLEAR_PARAMS(&p->cp->builder,
+         blitter->depth_clear_value);
 }
 
 static void
@@ -1613,11 +1613,11 @@ gen6_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
 
    gen6_wa_pipe_control_wm_multisample_flush(p);
 
-   gen6_emit_3DSTATE_MULTISAMPLE(p->dev, blitter->fb.num_samples,
-         packed_sample_pos, true, p->cp);
+   gen6_3DSTATE_MULTISAMPLE(&p->cp->builder, blitter->fb.num_samples,
+         packed_sample_pos, true);
 
-   gen6_emit_3DSTATE_SAMPLE_MASK(p->dev,
-         (1 << blitter->fb.num_samples) - 1, p->cp);
+   gen6_3DSTATE_SAMPLE_MASK(&p->cp->builder,
+         (1 << blitter->fb.num_samples) - 1);
 }
 
 static void
@@ -1631,14 +1631,14 @@ gen6_rectlist_commands(struct ilo_3d_pipeline *p,
 
    ilo_builder_batch_state_base_address(&p->cp->builder, true);
 
-   gen6_emit_3DSTATE_VERTEX_BUFFERS(p->dev,
-         &blitter->ve, &blitter->vb, p->cp);
+   gen6_3DSTATE_VERTEX_BUFFERS(&p->cp->builder,
+         &blitter->ve, &blitter->vb);
 
-   gen6_emit_3DSTATE_VERTEX_ELEMENTS(p->dev,
-         &blitter->ve, false, false, p->cp);
+   gen6_3DSTATE_VERTEX_ELEMENTS(&p->cp->builder,
+         &blitter->ve, false, false);
 
-   gen6_emit_3DSTATE_URB(p->dev,
-         p->dev->urb_size, 0, blitter->ve.count * 4 * sizeof(float), 0, p->cp);
+   gen6_3DSTATE_URB(&p->cp->builder,
+         p->dev->urb_size, 0, blitter->ve.count * 4 * sizeof(float), 0);
    /* 3DSTATE_URB workaround */
    if (p->state.gs.active) {
       ilo_3d_pipeline_emit_flush_gen6(p);
@@ -1647,24 +1647,24 @@ gen6_rectlist_commands(struct ilo_3d_pipeline *p,
 
    if (blitter->uses &
        (ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_CC)) {
-      gen6_emit_3DSTATE_CC_STATE_POINTERS(p->dev, 0,
-            session->DEPTH_STENCIL_STATE, session->COLOR_CALC_STATE, p->cp);
+      gen6_3DSTATE_CC_STATE_POINTERS(&p->cp->builder, 0,
+            session->DEPTH_STENCIL_STATE, session->COLOR_CALC_STATE);
    }
 
    gen6_rectlist_vs_to_sf(p, blitter, session);
    gen6_rectlist_wm(p, blitter, session);
 
    if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
-      gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev,
-            0, 0, session->CC_VIEWPORT, p->cp);
+      gen6_3DSTATE_VIEWPORT_STATE_POINTERS(&p->cp->builder,
+            0, 0, session->CC_VIEWPORT);
    }
 
    gen6_rectlist_wm_depth(p, blitter, session);
 
-   gen6_emit_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0,
-         blitter->fb.width, blitter->fb.height, p->cp);
+   gen6_3DSTATE_DRAWING_RECTANGLE(&p->cp->builder, 0, 0,
+         blitter->fb.width, blitter->fb.height);
 
-   gen6_emit_3DPRIMITIVE(p->dev, &blitter->draw, NULL, true, p->cp);
+   gen6_3DPRIMITIVE(&p->cp->builder, &blitter->draw, NULL, true);
 }
 
 static void
index e1be159c1cb5d5acb38f227b8987a12e5e85095c..e9f98fdeea1e7cd6494340cc41fe9d8e4b8038ed 100644 (file)
@@ -88,7 +88,7 @@ gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p,
       bo = p->workaround_bo;
    }
 
-   gen6_emit_PIPE_CONTROL(p->dev, dw1, bo, 0, false, p->cp);
+   gen6_PIPE_CONTROL(&p->cp->builder, dw1, bo, 0, false);
 }
 
 static void
@@ -105,10 +105,10 @@ gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
     *      3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL
     *      needs to be sent before any combination of VS associated 3DSTATE."
     */
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_STALL |
          GEN6_PIPE_CONTROL_WRITE_IMM,
-         p->workaround_bo, 0, false, p->cp);
+         p->workaround_bo, 0, false);
 }
 
 static void
@@ -144,20 +144,20 @@ gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
     *      guarantee that the pipeline from WM onwards is already flushed
     *      (e.g., via a preceding MI_FLUSH)."
     */
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_STALL,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 
    if (!change_depth_buffer)
       return;
 
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_DEPTH_STALL,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 }
 
 static void
@@ -172,9 +172,9 @@ gen7_wa_pipe_control_ps_max_threads_stall(struct ilo_3d_pipeline *p)
     *      between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at
     *      Pixel Scoreboard set is required to be issued."
     */
-   gen6_emit_PIPE_CONTROL(p->dev,
+   gen6_PIPE_CONTROL(&p->cp->builder,
          GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
-         NULL, 0, false, p->cp);
+         NULL, 0, false);
 
 }
 
@@ -326,7 +326,7 @@ gen7_pipeline_vs(struct ilo_3d_pipeline *p,
    if (emit_3dstate_vs) {
       const int num_samplers = ilo->sampler[PIPE_SHADER_VERTEX].count;
 
-      gen6_emit_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp);
+      gen6_3DSTATE_VS(&p->cp->builder, ilo->vs, num_samplers);
    }
 }
 
@@ -513,8 +513,8 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
 
    /* 3DSTATE_SCISSOR_STATE_POINTERS */
    if (session->scissor_state_changed) {
-      gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(p->dev,
-            p->state.SCISSOR_RECT, p->cp);
+      gen6_3DSTATE_SCISSOR_STATE_POINTERS(&p->cp->builder,
+            p->state.SCISSOR_RECT);
    }
 
    /* XXX what is the best way to know if this workaround is needed? */
@@ -557,9 +557,9 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
          clear_params = 0;
       }
 
-      gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp);
-      gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp);
-      gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp);
+      gen6_3DSTATE_DEPTH_BUFFER(&p->cp->builder, zs);
+      gen6_3DSTATE_HIER_DEPTH_BUFFER(&p->cp->builder, zs);
+      gen6_3DSTATE_STENCIL_BUFFER(&p->cp->builder, zs);
       gen7_3DSTATE_CLEAR_PARAMS(&p->cp->builder, clear_params);
    }
 }
@@ -580,9 +580,9 @@ gen7_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
          (ilo->fb.num_samples > 1) ? &p->packed_sample_position_4x :
          &p->packed_sample_position_1x;
 
-      gen6_emit_3DSTATE_MULTISAMPLE(p->dev,
+      gen6_3DSTATE_MULTISAMPLE(&p->cp->builder,
             ilo->fb.num_samples, packed_sample_pos,
-            ilo->rasterizer->state.half_pixel_center, p->cp);
+            ilo->rasterizer->state.half_pixel_center);
 
       gen7_3DSTATE_SAMPLE_MASK(&p->cp->builder,
             (ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1,
@@ -694,7 +694,7 @@ gen7_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
                        struct gen6_rectlist_session *session)
 {
    gen7_3DSTATE_CONSTANT_VS(&p->cp->builder, NULL, NULL, 0);
-   gen6_emit_3DSTATE_VS(p->dev, NULL, 0, p->cp);
+   gen6_3DSTATE_VS(&p->cp->builder, NULL, 0);
 
    gen7_3DSTATE_CONSTANT_HS(&p->cp->builder, NULL, NULL, 0);
    gen7_3DSTATE_HS(&p->cp->builder, NULL, 0);
@@ -709,7 +709,7 @@ gen7_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
 
    gen7_3DSTATE_STREAMOUT(&p->cp->builder, 0x0, 0, false);
 
-   gen6_emit_3DSTATE_CLIP(p->dev, NULL, NULL, false, 0, p->cp);
+   gen6_3DSTATE_CLIP(&p->cp->builder, NULL, NULL, false, 0);
 
    gen7_wa_pipe_control_cs_stall(p, true, true);
 
@@ -756,18 +756,18 @@ gen7_rectlist_wm_depth(struct ilo_3d_pipeline *p,
 
    if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
                         ILO_BLITTER_USE_FB_STENCIL)) {
-      gen6_emit_3DSTATE_DEPTH_BUFFER(p->dev,
-            &blitter->fb.dst.u.zs, p->cp);
+      gen6_3DSTATE_DEPTH_BUFFER(&p->cp->builder,
+            &blitter->fb.dst.u.zs);
    }
 
    if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
-      gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(p->dev,
-            &blitter->fb.dst.u.zs, p->cp);
+      gen6_3DSTATE_HIER_DEPTH_BUFFER(&p->cp->builder,
+            &blitter->fb.dst.u.zs);
    }
 
    if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) {
-      gen6_emit_3DSTATE_STENCIL_BUFFER(p->dev,
-            &blitter->fb.dst.u.zs, p->cp);
+      gen6_3DSTATE_STENCIL_BUFFER(&p->cp->builder,
+            &blitter->fb.dst.u.zs);
    }
 
    gen7_3DSTATE_CLEAR_PARAMS(&p->cp->builder,
@@ -786,8 +786,8 @@ gen7_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
 
    gen7_wa_pipe_control_cs_stall(p, true, true);
 
-   gen6_emit_3DSTATE_MULTISAMPLE(p->dev, blitter->fb.num_samples,
-         packed_sample_pos, true, p->cp);
+   gen6_3DSTATE_MULTISAMPLE(&p->cp->builder, blitter->fb.num_samples,
+         packed_sample_pos, true);
 
    gen7_3DSTATE_SAMPLE_MASK(&p->cp->builder,
          (1 << blitter->fb.num_samples) - 1, blitter->fb.num_samples);
@@ -802,11 +802,11 @@ gen7_rectlist_commands(struct ilo_3d_pipeline *p,
 
    ilo_builder_batch_state_base_address(&p->cp->builder, true);
 
-   gen6_emit_3DSTATE_VERTEX_BUFFERS(p->dev,
-         &blitter->ve, &blitter->vb, p->cp);
+   gen6_3DSTATE_VERTEX_BUFFERS(&p->cp->builder,
+         &blitter->ve, &blitter->vb);
 
-   gen6_emit_3DSTATE_VERTEX_ELEMENTS(p->dev,
-         &blitter->ve, false, false, p->cp);
+   gen6_3DSTATE_VERTEX_ELEMENTS(&p->cp->builder,
+         &blitter->ve, false, false);
 
    gen7_rectlist_pcb_alloc(p, blitter, session);
 
@@ -835,8 +835,8 @@ gen7_rectlist_commands(struct ilo_3d_pipeline *p,
 
    gen7_rectlist_wm_depth(p, blitter, session);
 
-   gen6_emit_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0,
-         blitter->fb.width, blitter->fb.height, p->cp);
+   gen6_3DSTATE_DRAWING_RECTANGLE(&p->cp->builder, 0, 0,
+         blitter->fb.width, blitter->fb.height);
 
    gen7_3DPRIMITIVE(&p->cp->builder, &blitter->draw, NULL, true);
 }
index 406e667c43028b8a7394b3a7628ed59629eb1e52..737454cd716534319ce86f34a8b77f38e7ad63c7 100644 (file)
@@ -239,240 +239,249 @@ ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
 }
 
 static inline void
-gen6_emit_MI_STORE_DATA_IMM(const struct ilo_dev_info *dev,
-                            struct intel_bo *bo, uint32_t bo_offset,
-                            uint64_t val, bool store_qword,
-                            struct ilo_cp *cp)
+gen6_MI_STORE_DATA_IMM(struct ilo_builder *builder,
+                       struct intel_bo *bo, uint32_t bo_offset,
+                       uint64_t val, bool store_qword)
 {
    const uint8_t cmd_len = (store_qword) ? 5 : 4;
    uint32_t dw0 = GEN6_MI_CMD(MI_STORE_DATA_IMM) | (cmd_len - 2);
    uint32_t reloc_flags = INTEL_RELOC_WRITE;
+   unsigned pos;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    assert(bo_offset % ((store_qword) ? 8 : 4) == 0);
 
    /* must use GGTT on GEN6 as in PIPE_CONTROL */
-   if (dev->gen == ILO_GEN(6)) {
+   if (builder->dev->gen == ILO_GEN(6)) {
       dw0 |= GEN6_MI_STORE_DATA_IMM_DW0_USE_GGTT;
       reloc_flags |= INTEL_RELOC_GGTT;
    }
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write_bo(cp, bo_offset, bo, reloc_flags);
-   ilo_cp_write(cp, (uint32_t) val);
-
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = 0;
+   dw[3] = (uint32_t) val;
    if (store_qword)
-      ilo_cp_write(cp, (uint32_t) (val >> 32));
+      dw[4] = (uint32_t) (val >> 32);
    else
       assert(val == (uint64_t) ((uint32_t) val));
 
-   ilo_cp_end(cp);
+   ilo_builder_batch_reloc(builder, pos + 2,
+         bo, bo_offset, reloc_flags);
 }
 
 static inline void
-gen6_emit_MI_LOAD_REGISTER_IMM(const struct ilo_dev_info *dev,
-                               uint32_t reg, uint32_t val,
-                               struct ilo_cp *cp)
+gen6_MI_LOAD_REGISTER_IMM(struct ilo_builder *builder,
+                          uint32_t reg, uint32_t val)
 {
    const uint8_t cmd_len = 3;
    const uint32_t dw0 = GEN6_MI_CMD(MI_LOAD_REGISTER_IMM) | (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    assert(reg % 4 == 0);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, reg);
-   ilo_cp_write(cp, val);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = reg;
+   dw[2] = val;
 }
 
 static inline void
-gen6_emit_MI_STORE_REGISTER_MEM(const struct ilo_dev_info *dev,
-                                struct intel_bo *bo, uint32_t bo_offset,
-                                uint32_t reg, struct ilo_cp *cp)
+gen6_MI_STORE_REGISTER_MEM(struct ilo_builder *builder,
+                           struct intel_bo *bo, uint32_t bo_offset,
+                           uint32_t reg)
 {
    const uint8_t cmd_len = 3;
    uint32_t dw0 = GEN6_MI_CMD(MI_STORE_REGISTER_MEM) | (cmd_len - 2);
    uint32_t reloc_flags = INTEL_RELOC_WRITE;
+   unsigned pos;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    assert(reg % 4 == 0 && bo_offset % 4 == 0);
 
    /* must use GGTT on GEN6 as in PIPE_CONTROL */
-   if (dev->gen == ILO_GEN(6)) {
+   if (builder->dev->gen == ILO_GEN(6)) {
       dw0 |= GEN6_MI_STORE_REGISTER_MEM_DW0_USE_GGTT;
       reloc_flags |= INTEL_RELOC_GGTT;
    }
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, reg);
-   ilo_cp_write_bo(cp, bo_offset, bo, reloc_flags);
-   ilo_cp_end(cp);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = reg;
+
+   ilo_builder_batch_reloc(builder, pos + 2,
+         bo, bo_offset, reloc_flags);
 }
 
 static inline void
-gen6_emit_MI_REPORT_PERF_COUNT(const struct ilo_dev_info *dev,
-                               struct intel_bo *bo, uint32_t bo_offset,
-                               uint32_t report_id, struct ilo_cp *cp)
+gen6_MI_REPORT_PERF_COUNT(struct ilo_builder *builder,
+                          struct intel_bo *bo, uint32_t bo_offset,
+                          uint32_t report_id)
 {
    const uint8_t cmd_len = 3;
    const uint32_t dw0 = GEN6_MI_CMD(MI_REPORT_PERF_COUNT) | (cmd_len - 2);
    uint32_t reloc_flags = INTEL_RELOC_WRITE;
+   unsigned pos;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    assert(bo_offset % 64 == 0);
 
    /* must use GGTT on GEN6 as in PIPE_CONTROL */
-   if (dev->gen == ILO_GEN(6)) {
+   if (builder->dev->gen == ILO_GEN(6)) {
       bo_offset |= GEN6_MI_REPORT_PERF_COUNT_DW1_USE_GGTT;
       reloc_flags |= INTEL_RELOC_GGTT;
    }
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write_bo(cp, bo_offset, bo, reloc_flags);
-   ilo_cp_write(cp, report_id);
-   ilo_cp_end(cp);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[2] = report_id;
+
+   ilo_builder_batch_reloc(builder, pos + 1,
+         bo, bo_offset, reloc_flags);
 }
 
 static inline void
-gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
-                             struct intel_bo *general_state_bo,
-                             struct intel_bo *surface_state_bo,
-                             struct intel_bo *dynamic_state_bo,
-                             struct intel_bo *indirect_object_bo,
-                             struct intel_bo *instruction_bo,
-                             uint32_t general_state_size,
-                             uint32_t dynamic_state_size,
-                             uint32_t indirect_object_size,
-                             uint32_t instruction_size,
-                             struct ilo_cp *cp)
+gen6_STATE_BASE_ADDRESS(struct ilo_builder *builder,
+                        struct intel_bo *general_state_bo,
+                        struct intel_bo *surface_state_bo,
+                        struct intel_bo *dynamic_state_bo,
+                        struct intel_bo *indirect_object_bo,
+                        struct intel_bo *instruction_bo,
+                        uint32_t general_state_size,
+                        uint32_t dynamic_state_size,
+                        uint32_t indirect_object_size,
+                        uint32_t instruction_size)
 {
    const uint8_t cmd_len = 10;
    const uint32_t dw0 = GEN6_RENDER_CMD(COMMON, STATE_BASE_ADDRESS) |
                         (cmd_len - 2);
+   unsigned pos;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    /* 4K-page aligned */
    assert(((general_state_size | dynamic_state_size |
             indirect_object_size | instruction_size) & 0xfff) == 0);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
 
-   ilo_cp_write_bo(cp, 1, general_state_bo, 0);
-   ilo_cp_write_bo(cp, 1, surface_state_bo, 0);
-   ilo_cp_write_bo(cp, 1, dynamic_state_bo, 0);
-   ilo_cp_write_bo(cp, 1, indirect_object_bo, 0);
-   ilo_cp_write_bo(cp, 1, instruction_bo, 0);
+   dw[1] = 1;
+   dw[2] = 1;
+   dw[3] = 1;
+   dw[4] = 1;
+   dw[5] = 1;
 
-   if (general_state_size) {
-      ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo, 0);
-   }
-   else {
-      /* skip range check */
-      ilo_cp_write(cp, 1);
-   }
+   /* skip range checks */
+   dw[6] = 1;
+   dw[7] = 0xfffff000 + 1;
+   dw[8] = 0xfffff000 + 1;
+   dw[9] = 1;
 
-   if (dynamic_state_size) {
-      ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo, 0);
-   }
-   else {
-      /* skip range check */
-      ilo_cp_write(cp, 0xfffff000 + 1);
-   }
+   if (general_state_bo) {
+      ilo_builder_batch_reloc(builder, pos + 1, general_state_bo, 1, 0);
 
-   if (indirect_object_size) {
-      ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo, 0);
-   }
-   else {
-      /* skip range check */
-      ilo_cp_write(cp, 0xfffff000 + 1);
+      if (general_state_size) {
+         ilo_builder_batch_reloc(builder, pos + 6, general_state_bo,
+               general_state_size | 1, 0);
+      }
    }
 
-   if (instruction_size) {
-      ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo, 0);
+   if (surface_state_bo)
+      ilo_builder_batch_reloc(builder, pos + 2, surface_state_bo, 1, 0);
+
+   if (dynamic_state_bo) {
+      ilo_builder_batch_reloc(builder, pos + 3, dynamic_state_bo, 1, 0);
+
+      if (dynamic_state_size) {
+         ilo_builder_batch_reloc(builder, pos + 7, dynamic_state_bo,
+               dynamic_state_size | 1, 0);
+      }
    }
-   else {
-      /* skip range check */
-      ilo_cp_write(cp, 1);
+
+   if (indirect_object_bo) {
+      ilo_builder_batch_reloc(builder, pos + 4, indirect_object_bo, 1, 0);
+
+      if (indirect_object_size) {
+         ilo_builder_batch_reloc(builder, pos + 8, indirect_object_bo,
+               indirect_object_size | 1, 0);
+      }
    }
 
-   ilo_cp_end(cp);
+   if (instruction_bo) {
+      ilo_builder_batch_reloc(builder, pos + 5, instruction_bo, 1, 0);
+
+      if (instruction_size) {
+         ilo_builder_batch_reloc(builder, pos + 9, instruction_bo,
+               instruction_size | 1, 0);
+      }
+   }
 }
 
 static inline void
-gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
-                    uint32_t sip,
-                    struct ilo_cp *cp)
+gen6_STATE_SIP(struct ilo_builder *builder,
+               uint32_t sip)
 {
    const uint8_t cmd_len = 2;
    const uint32_t dw0 = GEN6_RENDER_CMD(COMMON, STATE_SIP) | (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, sip);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = sip;
 }
 
 static inline void
-gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
-                                bool enable,
-                                struct ilo_cp *cp)
+gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder,
+                           bool enable)
 {
    const uint8_t cmd_len = 1;
    const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) |
                         enable;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_end(cp);
+   ilo_builder_batch_write(builder, cmd_len, &dw0);
 }
 
 static inline void
-gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
-                          int pipeline,
-                          struct ilo_cp *cp)
+gen6_PIPELINE_SELECT(struct ilo_builder *builder,
+                     int pipeline)
 {
    const uint8_t cmd_len = 1;
    const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, PIPELINE_SELECT) |
                         pipeline;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    /* 3D or media */
    assert(pipeline == 0x0 || pipeline == 0x1);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_end(cp);
+   ilo_builder_batch_write(builder, cmd_len, &dw0);
 }
 
 static inline void
-gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
-                          int max_threads, int num_urb_entries,
-                          int urb_entry_size,
-                          struct ilo_cp *cp)
+gen6_MEDIA_VFE_STATE(struct ilo_builder *builder,
+                     int max_threads, int num_urb_entries,
+                     int urb_entry_size)
 {
    const uint8_t cmd_len = 8;
    const uint32_t dw0 = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) |
                         (cmd_len - 2);
-   uint32_t dw2, dw4;
+   uint32_t dw2, dw4, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
    dw2 = (max_threads - 1) << 16 |
          num_urb_entries << 8 |
@@ -482,120 +491,107 @@ gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
    dw4 = urb_entry_size << 16 |  /* URB Entry Allocation Size */
          480;                    /* CURBE Allocation Size */
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* MBZ */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, 0); /* scoreboard */
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = 0; /* scratch */
+   dw[2] = dw2;
+   dw[3] = 0; /* MBZ */
+   dw[4] = dw4;
+   dw[5] = 0; /* scoreboard */
+   dw[6] = 0;
+   dw[7] = 0;
 }
 
 static inline void
-gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
-                          uint32_t buf, int size,
-                          struct ilo_cp *cp)
+gen6_MEDIA_CURBE_LOAD(struct ilo_builder *builder,
+                     uint32_t buf, int size)
 {
    const uint8_t cmd_len = 4;
    const uint32_t dw0 = GEN6_RENDER_CMD(MEDIA, MEDIA_CURBE_LOAD) |
                         (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
    assert(buf % 32 == 0);
    /* gen6_push_constant_buffer() allocates buffers in 256-bit units */
    size = align(size, 32);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, 0); /* MBZ */
-   ilo_cp_write(cp, size);
-   ilo_cp_write(cp, buf);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = 0; /* MBZ */
+   dw[2] = size;
+   dw[3] = buf;
 }
 
 static inline void
-gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
-                                          uint32_t offset, int num_ids,
-                                          struct ilo_cp *cp)
+gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(struct ilo_builder *builder,
+                                     uint32_t offset, int num_ids)
 {
    const uint8_t cmd_len = 4;
    const uint32_t dw0 =
       GEN6_RENDER_CMD(MEDIA, MEDIA_INTERFACE_DESCRIPTOR_LOAD) | (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
    assert(offset % 32 == 0);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, 0); /* MBZ */
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = 0; /* MBZ */
    /* every ID has 8 DWords */
-   ilo_cp_write(cp, num_ids * 8 * 4);
-   ilo_cp_write(cp, offset);
-   ilo_cp_end(cp);
+   dw[2] = num_ids * 8 * 4;
+   dw[3] = offset;
 }
 
 static inline void
-gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
-                              int id, int byte, int thread_count,
-                              struct ilo_cp *cp)
+gen6_MEDIA_GATEWAY_STATE(struct ilo_builder *builder,
+                         int id, int byte, int thread_count)
 {
    const uint8_t cmd_len = 2;
    const uint32_t dw0 = GEN6_RENDER_CMD(MEDIA, MEDIA_GATEWAY_STATE) |
                         (cmd_len - 2);
-   uint32_t dw1;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   uint32_t *dw;
 
-   dw1 = id << 16 |
-         byte << 8 |
-         thread_count;
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, dw1);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = id << 16 |
+           byte << 8 |
+           thread_count;
 }
 
 static inline void
-gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
-                            int thread_count_water_mark,
-                            int barrier_mask,
-                            struct ilo_cp *cp)
+gen6_MEDIA_STATE_FLUSH(struct ilo_builder *builder,
+                       int thread_count_water_mark,
+                       int barrier_mask)
 {
    const uint8_t cmd_len = 2;
    const uint32_t dw0 = GEN6_RENDER_CMD(MEDIA, MEDIA_STATE_FLUSH) |
                         (cmd_len - 2);
-   uint32_t dw1;
-
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   uint32_t *dw;
 
-   dw1 = thread_count_water_mark << 16 |
-         barrier_mask;
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, dw1);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = thread_count_water_mark << 16 |
+           barrier_mask;
 }
 
 static inline void
-gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
-                              struct ilo_cp *cp)
+gen6_MEDIA_OBJECT_WALKER(struct ilo_builder *builder)
 {
    assert(!"MEDIA_OBJECT_WALKER unsupported");
 }
 
 static inline void
-gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
-                                         uint32_t vs_binding_table,
-                                         uint32_t gs_binding_table,
-                                         uint32_t ps_binding_table,
-                                         struct ilo_cp *cp)
+gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder,
+                                    uint32_t vs_binding_table,
+                                    uint32_t gs_binding_table,
+                                    uint32_t ps_binding_table)
 {
    const uint8_t cmd_len = 4;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) |
@@ -603,23 +599,22 @@ gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
                         GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED |
                         GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED |
                         (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, vs_binding_table);
-   ilo_cp_write(cp, gs_binding_table);
-   ilo_cp_write(cp, ps_binding_table);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = vs_binding_table;
+   dw[2] = gs_binding_table;
+   dw[3] = ps_binding_table;
 }
 
 static inline void
-gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                         uint32_t vs_sampler_state,
-                                         uint32_t gs_sampler_state,
-                                         uint32_t ps_sampler_state,
-                                         struct ilo_cp *cp)
+gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder,
+                                    uint32_t vs_sampler_state,
+                                    uint32_t gs_sampler_state,
+                                    uint32_t ps_sampler_state)
 {
    const uint8_t cmd_len = 4;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) |
@@ -627,30 +622,30 @@ gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
                         GEN6_PTR_SAMPLER_DW0_GS_CHANGED |
                         GEN6_PTR_SAMPLER_DW0_PS_CHANGED |
                         (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, vs_sampler_state);
-   ilo_cp_write(cp, gs_sampler_state);
-   ilo_cp_write(cp, ps_sampler_state);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = vs_sampler_state;
+   dw[2] = gs_sampler_state;
+   dw[3] = ps_sampler_state;
 }
 
 static inline void
-gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
-                      int vs_total_size, int gs_total_size,
-                      int vs_entry_size, int gs_entry_size,
-                      struct ilo_cp *cp)
+gen6_3DSTATE_URB(struct ilo_builder *builder,
+                 int vs_total_size, int gs_total_size,
+                 int vs_entry_size, int gs_entry_size)
 {
    const uint8_t cmd_len = 3;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
    const int row_size = 128; /* 1024 bits */
    int vs_alloc_size, gs_alloc_size;
    int vs_num_entries, gs_num_entries;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
    /* in 1024-bit URB rows */
    vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
@@ -674,26 +669,24 @@ gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
    if (gs_num_entries > 256)
       gs_num_entries = 256;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
-                    vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT);
-   ilo_cp_write(cp, gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
-                    (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
+           vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
+   dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
+           (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
 }
 
 static inline void
-gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
-                                 const struct ilo_ve_state *ve,
-                                 const struct ilo_vb_state *vb,
-                                 struct ilo_cp *cp)
+gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
+                            const struct ilo_ve_state *ve,
+                            const struct ilo_vb_state *vb)
 {
    uint8_t cmd_len;
-   uint32_t dw0;
-   unsigned hw_idx;
+   uint32_t dw0, *dw;
+   unsigned hw_idx, pos;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    /*
     * From the Sandy Bridge PRM, volume 2 part 1, page 82:
@@ -709,24 +702,25 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
    dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) |
          (cmd_len - 2);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
 
+   dw++;
+   pos++;
    for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
       const unsigned instance_divisor = ve->instance_divisors[hw_idx];
       const unsigned pipe_idx = ve->vb_mapping[hw_idx];
       const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
-      uint32_t dw;
 
-      dw = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT;
+      dw[0] = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT;
 
       if (instance_divisor)
-         dw |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA;
+         dw[0] |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA;
       else
-         dw |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA;
+         dw[0] |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA;
 
-      if (dev->gen >= ILO_GEN(7))
-         dw |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
+      if (builder->dev->gen >= ILO_GEN(7))
+         dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
 
       /* use null vb if there is no buffer or the stride is out of range */
       if (cso->buffer && cso->stride <= 2048) {
@@ -734,24 +728,21 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
          const uint32_t start_offset = cso->buffer_offset;
          const uint32_t end_offset = buf->bo_size - 1;
 
-         dw |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
-
-         ilo_cp_write(cp, dw);
-         ilo_cp_write_bo(cp, start_offset, buf->bo, 0);
-         ilo_cp_write_bo(cp, end_offset, buf->bo, 0);
-         ilo_cp_write(cp, instance_divisor);
+         dw[0] |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
+         ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
+         ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
       }
       else {
-         dw |= 1 << 13;
-
-         ilo_cp_write(cp, dw);
-         ilo_cp_write(cp, 0);
-         ilo_cp_write(cp, 0);
-         ilo_cp_write(cp, instance_divisor);
+         dw[0] |= 1 << 13;
+         dw[1] = 0;
+         dw[2] = 0;
       }
-   }
 
-   ilo_cp_end(cp);
+      dw[3] = instance_divisor;
+
+      dw += 4;
+      pos += 4;
+   }
 }
 
 static inline void
@@ -821,17 +812,16 @@ ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
 }
 
 static inline void
-gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
-                                  const struct ilo_ve_state *ve,
-                                  bool last_velement_edgeflag,
-                                  bool prepend_generated_ids,
-                                  struct ilo_cp *cp)
+gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
+                             const struct ilo_ve_state *ve,
+                             bool last_velement_edgeflag,
+                             bool prepend_generated_ids)
 {
    uint8_t cmd_len;
-   uint32_t dw0;
+   uint32_t dw0, *dw;
    unsigned i;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    /*
     * From the Sandy Bridge PRM, volume 2 part 1, page 93:
@@ -840,10 +830,12 @@ gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
     */
    assert(ve->count + prepend_generated_ids <= 34);
 
+   STATIC_ASSERT(Elements(ve->cso[0].payload) == 2);
+
    if (!ve->count && !prepend_generated_ids) {
       struct ilo_ve_cso dummy;
 
-      ve_init_cso_with_components(dev,
+      ve_init_cso_with_components(builder->dev,
             GEN6_VFCOMP_STORE_0,
             GEN6_VFCOMP_STORE_0,
             GEN6_VFCOMP_STORE_0,
@@ -854,10 +846,9 @@ gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
       dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
             (cmd_len - 2);
 
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, dw0);
-      ilo_cp_write_multi(cp, dummy.payload, 2);
-      ilo_cp_end(cp);
+      ilo_builder_batch_pointer(builder, cmd_len, &dw);
+      dw[0] = dw0;
+      memcpy(&dw[1], dummy.payload, sizeof(dummy.payload));
 
       return;
    }
@@ -866,59 +857,59 @@ gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
    dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
          (cmd_len - 2);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw++;
 
    if (prepend_generated_ids) {
       struct ilo_ve_cso gen_ids;
 
-      ve_init_cso_with_components(dev,
+      ve_init_cso_with_components(builder->dev,
             GEN6_VFCOMP_STORE_VID,
             GEN6_VFCOMP_STORE_IID,
             GEN6_VFCOMP_NOSTORE,
             GEN6_VFCOMP_NOSTORE,
             &gen_ids);
 
-      ilo_cp_write_multi(cp, gen_ids.payload, 2);
+      memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload));
+      dw += 2;
    }
 
    if (last_velement_edgeflag) {
       struct ilo_ve_cso edgeflag;
 
       for (i = 0; i < ve->count - 1; i++)
-         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
+         memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload));
 
       edgeflag = ve->cso[i];
-      ve_set_cso_edgeflag(dev, &edgeflag);
-      ilo_cp_write_multi(cp, edgeflag.payload, 2);
+      ve_set_cso_edgeflag(builder->dev, &edgeflag);
+      memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload));
    }
    else {
       for (i = 0; i < ve->count; i++)
-         ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
+         memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload));
    }
-
-   ilo_cp_end(cp);
 }
 
 static inline void
-gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
-                               const struct ilo_ib_state *ib,
-                               bool enable_cut_index,
-                               struct ilo_cp *cp)
+gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
+                          const struct ilo_ib_state *ib,
+                          bool enable_cut_index)
 {
    const uint8_t cmd_len = 3;
    struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
    uint32_t start_offset, end_offset;
    int format;
-   uint32_t dw0;
+   unsigned pos;
+   uint32_t dw0, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    if (!buf)
       return;
 
    /* this is moved to the new 3DSTATE_VF */
-   if (dev->gen >= ILO_GEN(7.5))
+   if (builder->dev->gen >= ILO_GEN(7.5))
       assert(!enable_cut_index);
 
    switch (ib->hw_index_size) {
@@ -954,19 +945,17 @@ gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
    if (enable_cut_index)
       dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write_bo(cp, start_offset, buf->bo, 0);
-   ilo_cp_write_bo(cp, end_offset, buf->bo, 0);
-   ilo_cp_end(cp);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
+   ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
 }
 
 static inline void
-gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                          uint32_t clip_viewport,
-                                          uint32_t sf_viewport,
-                                          uint32_t cc_viewport,
-                                          struct ilo_cp *cp)
+gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct ilo_builder *builder,
+                                     uint32_t clip_viewport,
+                                     uint32_t sf_viewport,
+                                     uint32_t cc_viewport)
 {
    const uint8_t cmd_len = 4;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) |
@@ -974,77 +963,74 @@ gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
                         GEN6_PTR_VP_DW0_SF_CHANGED |
                         GEN6_PTR_VP_DW0_CC_CHANGED |
                         (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, clip_viewport);
-   ilo_cp_write(cp, sf_viewport);
-   ilo_cp_write(cp, cc_viewport);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = clip_viewport;
+   dw[2] = sf_viewport;
+   dw[3] = cc_viewport;
 }
 
 static inline void
-gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                    uint32_t blend_state,
-                                    uint32_t depth_stencil_state,
-                                    uint32_t color_calc_state,
-                                    struct ilo_cp *cp)
+gen6_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder,
+                               uint32_t blend_state,
+                               uint32_t depth_stencil_state,
+                               uint32_t color_calc_state)
 {
    const uint8_t cmd_len = 4;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) |
                         (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, blend_state | 1);
-   ilo_cp_write(cp, depth_stencil_state | 1);
-   ilo_cp_write(cp, color_calc_state | 1);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = blend_state | 1;
+   dw[2] = depth_stencil_state | 1;
+   dw[3] = color_calc_state | 1;
 }
 
 static inline void
-gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
-                                         uint32_t scissor_rect,
-                                         struct ilo_cp *cp)
+gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct ilo_builder *builder,
+                                    uint32_t scissor_rect)
 {
    const uint8_t cmd_len = 2;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) |
                         (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, scissor_rect);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = scissor_rect;
 }
 
 static inline void
-gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *vs,
-                     int num_samplers,
-                     struct ilo_cp *cp)
+gen6_3DSTATE_VS(struct ilo_builder *builder,
+                const struct ilo_shader_state *vs,
+                int num_samplers)
 {
    const uint8_t cmd_len = 6;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
    const struct ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5;
+   uint32_t dw2, dw4, dw5, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    if (!vs) {
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, dw0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
+      ilo_builder_batch_pointer(builder, cmd_len, &dw);
+      dw[0] = dw0;
+      dw[1] = 0;
+      dw[2] = 0;
+      dw[3] = 0;
+      dw[4] = 0;
+      dw[5] = 0;
+
       return;
    }
 
@@ -1055,28 +1041,26 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
 
    dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = ilo_shader_get_kernel_offset(vs);
+   dw[2] = dw2;
+   dw[3] = 0; /* scratch */
+   dw[4] = dw4;
+   dw[5] = dw5;
 }
 
 static inline void
-gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *gs,
-                     const struct ilo_shader_state *vs,
-                     int verts_per_prim,
-                     struct ilo_cp *cp)
+gen6_3DSTATE_GS(struct ilo_builder *builder,
+                const struct ilo_shader_state *gs,
+                const struct ilo_shader_state *vs,
+                int verts_per_prim)
 {
    const uint8_t cmd_len = 7;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
-   uint32_t dw1, dw2, dw4, dw5, dw6;
+   uint32_t dw1, dw2, dw4, dw5, dw6, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
    if (gs) {
       const struct ilo_shader_cso *cso;
@@ -1109,7 +1093,7 @@ gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
          ilo_shader_get_kernel_param(vs, param);
 
       /* cannot use VS's CSO */
-      ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
+      ilo_gpe_init_gs_cso_gen6(builder->dev, vs, &cso);
       dw2 = cso.payload[0];
       dw4 = cso.payload[1];
       dw5 = cso.payload[2];
@@ -1123,30 +1107,28 @@ gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
       dw6 = 0;
    }
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0);
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_write(cp, dw6);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = dw1;
+   dw[2] = dw2;
+   dw[3] = 0;
+   dw[4] = dw4;
+   dw[5] = dw5;
+   dw[6] = dw6;
 }
 
 static inline void
-gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
-                       const struct ilo_rasterizer_state *rasterizer,
-                       const struct ilo_shader_state *fs,
-                       bool enable_guardband,
-                       int num_viewports,
-                       struct ilo_cp *cp)
+gen6_3DSTATE_CLIP(struct ilo_builder *builder,
+                  const struct ilo_rasterizer_state *rasterizer,
+                  const struct ilo_shader_state *fs,
+                  bool enable_guardband,
+                  int num_viewports)
 {
    const uint8_t cmd_len = 4;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
-   uint32_t dw1, dw2, dw3;
+   uint32_t dw1, dw2, dw3, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    if (rasterizer) {
       int interps;
@@ -1175,72 +1157,67 @@ gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
       dw3 = 0;
    }
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, dw3);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = dw1;
+   dw[2] = dw2;
+   dw[3] = dw3;
 }
 
 static inline void
-gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
-                     const struct ilo_rasterizer_state *rasterizer,
-                     const struct ilo_shader_state *fs,
-                     struct ilo_cp *cp)
+gen6_3DSTATE_SF(struct ilo_builder *builder,
+                const struct ilo_rasterizer_state *rasterizer,
+                const struct ilo_shader_state *fs)
 {
    const uint8_t cmd_len = 20;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
-   uint32_t payload_raster[6], payload_sbe[13];
+   uint32_t payload_raster[6], payload_sbe[13], *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
-   ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
+   ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, rasterizer,
          1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
-   ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
+   ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, rasterizer,
          fs, payload_sbe, Elements(payload_sbe));
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, payload_sbe[0]);
-   ilo_cp_write_multi(cp, payload_raster, 6);
-   ilo_cp_write_multi(cp, &payload_sbe[1], 12);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = payload_sbe[0];
+   memcpy(&dw[2], payload_raster, sizeof(payload_raster));
+   memcpy(&dw[8], &payload_sbe[1], sizeof(payload_sbe) - 4);
 }
 
 static inline void
-gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
-                     const struct ilo_shader_state *fs,
-                     int num_samplers,
-                     const struct ilo_rasterizer_state *rasterizer,
-                     bool dual_blend, bool cc_may_kill,
-                     uint32_t hiz_op,
-                     struct ilo_cp *cp)
+gen6_3DSTATE_WM(struct ilo_builder *builder,
+                const struct ilo_shader_state *fs,
+                int num_samplers,
+                const struct ilo_rasterizer_state *rasterizer,
+                bool dual_blend, bool cc_may_kill,
+                uint32_t hiz_op)
 {
    const uint8_t cmd_len = 9;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
    const int num_samples = 1;
    const struct ilo_shader_cso *fs_cso;
-   uint32_t dw2, dw4, dw5, dw6;
+   uint32_t dw2, dw4, dw5, dw6, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
    if (!fs) {
       /* see brwCreateContext() */
-      const int max_threads = (dev->gt == 2) ? 80 : 40;
-
-      ilo_cp_begin(cp, cmd_len);
-      ilo_cp_write(cp, dw0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, hiz_op);
+      const int max_threads = (builder->dev->gt == 2) ? 80 : 40;
+
+      ilo_builder_batch_pointer(builder, cmd_len, &dw);
+      dw[0] = dw0;
+      dw[1] = 0;
+      dw[2] = 0;
+      dw[3] = 0;
+      dw[4] = hiz_op;
       /* honor the valid range even if dispatching is disabled */
-      ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_write(cp, 0);
-      ilo_cp_end(cp);
+      dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
+      dw[6] = 0;
+      dw[7] = 0;
+      dw[8] = 0;
 
       return;
    }
@@ -1278,17 +1255,16 @@ gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
              rasterizer->wm.dw_msaa_disp;
    }
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
-   ilo_cp_write(cp, dw2);
-   ilo_cp_write(cp, 0); /* scratch */
-   ilo_cp_write(cp, dw4);
-   ilo_cp_write(cp, dw5);
-   ilo_cp_write(cp, dw6);
-   ilo_cp_write(cp, 0); /* kernel 1 */
-   ilo_cp_write(cp, 0); /* kernel 2 */
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = ilo_shader_get_kernel_offset(fs);
+   dw[2] = dw2;
+   dw[3] = 0; /* scratch */
+   dw[4] = dw4;
+   dw[5] = dw5;
+   dw[6] = dw6;
+   dw[7] = 0; /* kernel 1 */
+   dw[8] = 0; /* kernel 2 */
 }
 
 static inline unsigned
@@ -1327,16 +1303,15 @@ gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
 }
 
 static inline void
-gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
+gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
+                         const uint32_t *bufs, const int *sizes,
+                         int num_bufs)
 {
    const uint8_t cmd_len = 5;
    uint32_t buf_dw[4], buf_enabled;
-   uint32_t dw0;
+   uint32_t dw0, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
    assert(num_bufs <= 4);
 
    /*
@@ -1345,33 +1320,28 @@ gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
     *     "The sum of all four read length fields (each incremented to
     *      represent the actual read length) must be less than or equal to 32"
     */
-   buf_enabled = gen6_fill_3dstate_constant(dev,
+   buf_enabled = gen6_fill_3dstate_constant(builder->dev,
          bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
 
    dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) |
          buf_enabled << 12 |
          (cmd_len - 2);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, buf_dw[0]);
-   ilo_cp_write(cp, buf_dw[1]);
-   ilo_cp_write(cp, buf_dw[2]);
-   ilo_cp_write(cp, buf_dw[3]);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   memcpy(&dw[1], buf_dw, sizeof(buf_dw));
 }
 
 static inline void
-gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
+gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
+                         const uint32_t *bufs, const int *sizes,
+                         int num_bufs)
 {
    const uint8_t cmd_len = 5;
    uint32_t buf_dw[4], buf_enabled;
-   uint32_t dw0;
+   uint32_t dw0, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
    assert(num_bufs <= 4);
 
    /*
@@ -1380,33 +1350,28 @@ gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
     *     "The sum of all four read length fields (each incremented to
     *      represent the actual read length) must be less than or equal to 64"
     */
-   buf_enabled = gen6_fill_3dstate_constant(dev,
+   buf_enabled = gen6_fill_3dstate_constant(builder->dev,
          bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
 
    dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) |
          buf_enabled << 12 |
          (cmd_len - 2);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, buf_dw[0]);
-   ilo_cp_write(cp, buf_dw[1]);
-   ilo_cp_write(cp, buf_dw[2]);
-   ilo_cp_write(cp, buf_dw[3]);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   memcpy(&dw[1], buf_dw, sizeof(buf_dw));
 }
 
 static inline void
-gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
-                              const uint32_t *bufs, const int *sizes,
-                              int num_bufs,
-                              struct ilo_cp *cp)
+gen6_3DSTATE_CONSTANT_PS(struct ilo_builder *builder,
+                         const uint32_t *bufs, const int *sizes,
+                         int num_bufs)
 {
    const uint8_t cmd_len = 5;
    uint32_t buf_dw[4], buf_enabled;
-   uint32_t dw0;
+   uint32_t dw0, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
    assert(num_bufs <= 4);
 
    /*
@@ -1415,47 +1380,41 @@ gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
     *     "The sum of all four read length fields (each incremented to
     *      represent the actual read length) must be less than or equal to 64"
     */
-   buf_enabled = gen6_fill_3dstate_constant(dev,
+   buf_enabled = gen6_fill_3dstate_constant(builder->dev,
          bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
 
    dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) |
          buf_enabled << 12 |
          (cmd_len - 2);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, buf_dw[0]);
-   ilo_cp_write(cp, buf_dw[1]);
-   ilo_cp_write(cp, buf_dw[2]);
-   ilo_cp_write(cp, buf_dw[3]);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   memcpy(&dw[1], buf_dw, sizeof(buf_dw));
 }
 
 static inline void
-gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
-                              unsigned sample_mask,
-                              struct ilo_cp *cp)
+gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
+                         unsigned sample_mask)
 {
    const uint8_t cmd_len = 2;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) |
                         (cmd_len - 2);
    const unsigned valid_mask = 0xf;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
    sample_mask &= valid_mask;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, sample_mask);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = sample_mask;
 }
 
 static inline void
-gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
-                                    unsigned x, unsigned y,
-                                    unsigned width, unsigned height,
-                                    struct ilo_cp *cp)
+gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder,
+                               unsigned x, unsigned y,
+                               unsigned width, unsigned height)
 {
    const uint8_t cmd_len = 4;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) |
@@ -1463,10 +1422,11 @@ gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
    unsigned xmax = x + width - 1;
    unsigned ymax = y + height - 1;
    int rect_limit;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
-   if (dev->gen >= ILO_GEN(7)) {
+   if (builder->dev->gen >= ILO_GEN(7)) {
       rect_limit = 16383;
    }
    else {
@@ -1486,18 +1446,17 @@ gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
    if (xmax > rect_limit) xmax = rect_limit;
    if (ymax > rect_limit) ymax = rect_limit;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, y << 16 | x);
-   ilo_cp_write(cp, ymax << 16 | xmax);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = dw0;
+   dw[1] = y << 16 | x;
+   dw[2] = ymax << 16 | xmax;
 
    /*
     * There is no need to set the origin.  It is intended to support front
     * buffer rendering.
     */
-   ilo_cp_write(cp, 0);
-
-   ilo_cp_end(cp);
+   dw[3] = 0;
 }
 
 static inline void
@@ -1536,159 +1495,159 @@ zs_align_surface(const struct ilo_dev_info *dev,
 }
 
 static inline void
-gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
-                               const struct ilo_zs_surface *zs,
-                               struct ilo_cp *cp)
+gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
+                          const struct ilo_zs_surface *zs)
 {
    const uint8_t cmd_len = 7;
-   uint32_t dw0;
+   unsigned pos;
+   uint32_t dw0, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
-   dw0 = (dev->gen >= ILO_GEN(7)) ?
+   dw0 = (builder->dev->gen >= ILO_GEN(7)) ?
       GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) :
       GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER);
    dw0 |= (cmd_len - 2);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, zs->payload[0]);
-   ilo_cp_write_bo(cp, zs->payload[1], zs->bo, INTEL_RELOC_WRITE);
-   ilo_cp_write(cp, zs->payload[2]);
-   ilo_cp_write(cp, zs->payload[3]);
-   ilo_cp_write(cp, zs->payload[4]);
-   ilo_cp_write(cp, zs->payload[5]);
-   ilo_cp_end(cp);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = zs->payload[0];
+
+   if (zs->bo) {
+      ilo_builder_batch_reloc(builder, pos + 2,
+            zs->bo, zs->payload[1], INTEL_RELOC_WRITE);
+   } else {
+      dw[2] = 0;
+   }
+
+   dw[3] = zs->payload[2];
+   dw[4] = zs->payload[3];
+   dw[5] = zs->payload[4];
+   dw[6] = zs->payload[5];
 }
 
 static inline void
-gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
-                                      int x_offset, int y_offset,
-                                      struct ilo_cp *cp)
+gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder,
+                                 int x_offset, int y_offset)
 {
    const uint8_t cmd_len = 2;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) |
                         (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
    assert(x_offset >= 0 && x_offset <= 31);
    assert(y_offset >= 0 && y_offset <= 31);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, x_offset << 8 | y_offset);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = x_offset << 8 | y_offset;
 }
 
 static inline void
-gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
-                                       const struct pipe_poly_stipple *pattern,
-                                       struct ilo_cp *cp)
+gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder,
+                                  const struct pipe_poly_stipple *pattern)
 {
    const uint8_t cmd_len = 33;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) |
                         (cmd_len - 2);
+   uint32_t *dw;
    int i;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
-   assert(Elements(pattern->stipple) == 32);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
+   STATIC_ASSERT(Elements(pattern->stipple) == 32);
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw++;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
    for (i = 0; i < 32; i++)
-      ilo_cp_write(cp, pattern->stipple[i]);
-   ilo_cp_end(cp);
+      dw[i] = pattern->stipple[i];
 }
 
 static inline void
-gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
-                               unsigned pattern, unsigned factor,
-                               struct ilo_cp *cp)
+gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder,
+                          unsigned pattern, unsigned factor)
 {
    const uint8_t cmd_len = 3;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) |
                         (cmd_len - 2);
+   uint32_t *dw;
    unsigned inverse;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
    assert((pattern & 0xffff) == pattern);
    assert(factor >= 1 && factor <= 256);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, pattern);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = pattern;
 
-   if (dev->gen >= ILO_GEN(7)) {
+   if (builder->dev->gen >= ILO_GEN(7)) {
       /* in U1.16 */
       inverse = (unsigned) (65536.0f / factor);
-      ilo_cp_write(cp, inverse << 15 | factor);
+      dw[2] = inverse << 15 | factor;
    }
    else {
       /* in U1.13 */
       inverse = (unsigned) (8192.0f / factor);
-      ilo_cp_write(cp, inverse << 16 | factor);
+      dw[2] = inverse << 16 | factor;
    }
-
-   ilo_cp_end(cp);
 }
 
 static inline void
-gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
-                                     struct ilo_cp *cp)
+gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder)
 {
    const uint8_t cmd_len = 3;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) |
                         (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, 0 << 16 | 0);
-   ilo_cp_write(cp, 0 << 16 | 0);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = 0 << 16 | 0;
+   dw[2] = 0 << 16 | 0;
 }
 
 static inline void
-gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
-                               int index, unsigned svbi,
-                               unsigned max_svbi,
-                               bool load_vertex_count,
-                               struct ilo_cp *cp)
+gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
+                          int index, unsigned svbi,
+                          unsigned max_svbi,
+                          bool load_vertex_count)
 {
    const uint8_t cmd_len = 4;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) |
                         (cmd_len - 2);
-   uint32_t dw1;
+   uint32_t dw1, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
    assert(index >= 0 && index < 4);
 
    dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT;
    if (load_vertex_count)
       dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT;
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, svbi);
-   ilo_cp_write(cp, max_svbi);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = dw1;
+   dw[2] = svbi;
+   dw[3] = max_svbi;
 }
 
 static inline void
-gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
-                              int num_samples,
-                              const uint32_t *packed_sample_pos,
-                              bool pixel_location_center,
-                              struct ilo_cp *cp)
+gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder,
+                         int num_samples,
+                         const uint32_t *packed_sample_pos,
+                         bool pixel_location_center)
 {
-   const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
+   const uint8_t cmd_len = (builder->dev->gen >= ILO_GEN(7)) ? 4 : 3;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) |
                         (cmd_len - 2);
-   uint32_t dw1, dw2, dw3;
+   uint32_t dw1, dw2, dw3, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    dw1 = (pixel_location_center) ?
       GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
@@ -1706,7 +1665,7 @@ gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
       dw3 = 0;
       break;
    case 8:
-      assert(dev->gen >= ILO_GEN(7));
+      assert(builder->dev->gen >= ILO_GEN(7));
       dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
       dw2 = packed_sample_pos[0];
       dw3 = packed_sample_pos[1];
@@ -1719,91 +1678,100 @@ gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
       break;
    }
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write(cp, dw2);
-   if (dev->gen >= ILO_GEN(7))
-      ilo_cp_write(cp, dw3);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = dw1;
+   dw[2] = dw2;
+   if (builder->dev->gen >= ILO_GEN(7))
+      dw[2] = dw3;
 }
 
 static inline void
-gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
-                                 const struct ilo_zs_surface *zs,
-                                 struct ilo_cp *cp)
+gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
+                            const struct ilo_zs_surface *zs)
 {
    const uint8_t cmd_len = 3;
-   uint32_t dw0;
+   uint32_t dw0, *dw;
+   unsigned pos;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
-   dw0 = (dev->gen >= ILO_GEN(7)) ?
+   dw0 = (builder->dev->gen >= ILO_GEN(7)) ?
       GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) :
       GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER);
    dw0 |= (cmd_len - 2);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
    /* see ilo_gpe_init_zs_surface() */
-   ilo_cp_write(cp, zs->payload[6]);
-   ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo, INTEL_RELOC_WRITE);
-   ilo_cp_end(cp);
+   dw[1] = zs->payload[6];
+
+   if (zs->separate_s8_bo) {
+      ilo_builder_batch_reloc(builder, pos + 2,
+            zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
+   } else {
+      dw[2] = 0;
+   }
 }
 
 static inline void
-gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
-                                    const struct ilo_zs_surface *zs,
-                                    struct ilo_cp *cp)
+gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
+                               const struct ilo_zs_surface *zs)
 {
    const uint8_t cmd_len = 3;
-   uint32_t dw0;
+   uint32_t dw0, *dw;
+   unsigned pos;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
-   dw0 = (dev->gen >= ILO_GEN(7)) ?
+   dw0 = (builder->dev->gen >= ILO_GEN(7)) ?
       GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) :
       GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER);
    dw0 |= (cmd_len - 2);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
    /* see ilo_gpe_init_zs_surface() */
-   ilo_cp_write(cp, zs->payload[8]);
-   ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo, INTEL_RELOC_WRITE);
-   ilo_cp_end(cp);
+   dw[1] = zs->payload[8];
+
+   if (zs->hiz_bo) {
+      ilo_builder_batch_reloc(builder, pos + 2,
+            zs->hiz_bo, zs->payload[9], INTEL_RELOC_WRITE);
+   } else {
+      dw[2] = 0;
+   }
 }
 
 static inline void
-gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
-                               uint32_t clear_val,
-                               struct ilo_cp *cp)
+gen6_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder,
+                          uint32_t clear_val)
 {
    const uint8_t cmd_len = 2;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) |
                         GEN6_CLEAR_PARAMS_DW0_VALID |
                         (cmd_len - 2);
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, clear_val);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = clear_val;
 }
 
 static inline void
-gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
-                       uint32_t dw1,
-                       struct intel_bo *bo, uint32_t bo_offset,
-                       bool write_qword,
-                       struct ilo_cp *cp)
+gen6_PIPE_CONTROL(struct ilo_builder *builder,
+                  uint32_t dw1,
+                  struct intel_bo *bo, uint32_t bo_offset,
+                  bool write_qword)
 {
    const uint8_t cmd_len = (write_qword) ? 5 : 4;
    const uint32_t dw0 = GEN6_RENDER_CMD(3D, PIPE_CONTROL) | (cmd_len - 2);
    uint32_t reloc_flags = INTEL_RELOC_WRITE;
+   unsigned pos;
+   uint32_t *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 7.5);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 7.5);
 
    assert(bo_offset % ((write_qword) ? 8 : 4) == 0);
 
@@ -1840,7 +1808,7 @@ gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
                   GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT |
                   GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
 
-      if (dev->gen == ILO_GEN(6))
+      if (builder->dev->gen == ILO_GEN(6))
          bit_test |= GEN6_PIPE_CONTROL_NOTIFY_ENABLE;
 
       assert(dw1 & bit_test);
@@ -1868,27 +1836,30 @@ gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
     * The kernel will add the mapping automatically (when write domain is
     * INTEL_DOMAIN_INSTRUCTION).
     */
-   if (dev->gen == ILO_GEN(6) && bo) {
+   if (builder->dev->gen == ILO_GEN(6) && bo) {
       bo_offset |= GEN6_PIPE_CONTROL_DW2_USE_GGTT;
       reloc_flags |= INTEL_RELOC_GGTT;
    }
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, dw1);
-   ilo_cp_write_bo(cp, bo_offset, bo, reloc_flags);
-   ilo_cp_write(cp, 0);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = dw1;
+
+   if (bo)
+      ilo_builder_batch_reloc(builder, pos + 2, bo, bo_offset, reloc_flags);
+   else
+      dw[2] = 0;
+
+   dw[3] = 0;
    if (write_qword)
-      ilo_cp_write(cp, 0);
-   ilo_cp_end(cp);
+      dw[4] = 0;
 }
 
 static inline void
-gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
-                      const struct pipe_draw_info *info,
-                      const struct ilo_ib_state *ib,
-                      bool rectlist,
-                      struct ilo_cp *cp)
+gen6_3DPRIMITIVE(struct ilo_builder *builder,
+                 const struct pipe_draw_info *info,
+                 const struct ilo_ib_state *ib,
+                 bool rectlist)
 {
    const uint8_t cmd_len = 6;
    const int prim = (rectlist) ?
@@ -1897,23 +1868,22 @@ gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
       GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL;
    const uint32_t vb_start = info->start +
       ((info->indexed) ? ib->draw_start_offset : 0);
-   uint32_t dw0;
+   uint32_t dw0, *dw;
 
-   ILO_GPE_VALID_GEN(dev, 6, 6);
+   ILO_GPE_VALID_GEN(builder->dev, 6, 6);
 
    dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) |
          vb_access |
          prim << GEN6_3DPRIM_DW0_TYPE__SHIFT |
          (cmd_len - 2);
 
-   ilo_cp_begin(cp, cmd_len);
-   ilo_cp_write(cp, dw0);
-   ilo_cp_write(cp, info->count);
-   ilo_cp_write(cp, vb_start);
-   ilo_cp_write(cp, info->instance_count);
-   ilo_cp_write(cp, info->start_instance);
-   ilo_cp_write(cp, info->index_bias);
-   ilo_cp_end(cp);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   dw[0] = dw0;
+   dw[1] = info->count;
+   dw[2] = vb_start;
+   dw[3] = info->instance_count;
+   dw[4] = info->start_instance;
+   dw[5] = info->index_bias;
 }
 
 static inline uint32_t