i965/blorp: Reduce the size of vertex buffer
[mesa.git] / src / mesa / drivers / dri / i965 / gen6_blorp.cpp
index b6a3d78d8496d18afb86beb78e62e86debb9932b..cf30f71ea515304d4d00e90788650ce7ac50dabc 100644 (file)
 #include <assert.h>
 
 #include "intel_batchbuffer.h"
-#include "intel_fbo.h"
 #include "intel_mipmap_tree.h"
 
 #include "brw_context.h"
-#include "brw_defines.h"
 #include "brw_state.h"
 
 #include "brw_blorp.h"
-#include "gen6_blorp.h"
-
-/**
- * \name Constants for BLORP VBO
- * \{
- */
-#define GEN6_BLORP_NUM_VERTICES 3
-#define GEN6_BLORP_NUM_VUE_ELEMS 8
-#define GEN6_BLORP_VBO_SIZE (GEN6_BLORP_NUM_VERTICES \
-                             * GEN6_BLORP_NUM_VUE_ELEMS \
-                             * sizeof(float))
-/** \} */
 
 /**
  * CMD_STATE_BASE_ADDRESS
@@ -109,19 +95,33 @@ gen6_blorp_emit_vertex_buffer_state(struct brw_context *brw,
    if (brw->gen >= 7)
       dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
 
-   if (brw->gen == 7)
+   switch (brw->gen) {
+   case 7:
       dw0 |= GEN7_MOCS_L3 << 16;
+      break;
+   case 8:
+      dw0 |= BDW_MOCS_WB << 16;
+      break;
+   case 9:
+      dw0 |= SKL_MOCS_WB << 16;
+      break;
+   }
 
    BEGIN_BATCH(batch_length);
    OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
    OUT_BATCH(dw0);
-   /* start address */
-   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
-             vertex_offset);
-   /* end address */
-   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
-             vertex_offset + vbo_size - 1);
-   OUT_BATCH(0);
+   if (brw->gen >= 8) {
+      OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, vertex_offset);
+      OUT_BATCH(vbo_size);
+   } else {
+      /* start address */
+      OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+                vertex_offset);
+      /* end address */
+      OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+                vertex_offset + vbo_size - 1);
+      OUT_BATCH(0);
+   }
    ADVANCE_BATCH();
 }
 
@@ -159,25 +159,32 @@ gen6_blorp_emit_vertices(struct brw_context *brw,
     *
     * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
     * "Vertex URB Entry (VUE) Formats".
+    *
+    * Only vertex position X and Y are going to be variable, Z is fixed to
+    * zero and W to one. Header words dw0-3 are all zero. There is no need to
+    * include the fixed values in the vertex buffer. Vertex fetcher can be
+    * instructed to fill vertex elements with constant values of one and zero
+    * instead of reading them from the buffer. See the vertex element setup
+    * below.
     */
    {
       float *vertex_data;
 
-      const float vertices[GEN6_BLORP_VBO_SIZE] = {
-         /* v0 */ 0, 0, 0, 0,     (float) params->x0, (float) params->y1, 0, 1,
-         /* v1 */ 0, 0, 0, 0,     (float) params->x1, (float) params->y1, 0, 1,
-         /* v2 */ 0, 0, 0, 0,     (float) params->x0, (float) params->y0, 0, 1,
+      const float vertices[] = {
+         /* v0 */ (float)params->x0, (float)params->y1,
+         /* v1 */ (float)params->x1, (float)params->y1,
+         /* v2 */ (float)params->x0, (float)params->y0,
       };
 
       vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
-                                              GEN6_BLORP_VBO_SIZE, 32,
+                                              sizeof(vertices), 32,
                                               &vertex_offset);
-      memcpy(vertex_data, vertices, GEN6_BLORP_VBO_SIZE);
-   }
+      memcpy(vertex_data, vertices, sizeof(vertices));
 
-   gen6_blorp_emit_vertex_buffer_state(brw, GEN6_BLORP_NUM_VUE_ELEMS,
-                                       GEN6_BLORP_VBO_SIZE,
-                                       vertex_offset);
+      const unsigned blorp_num_vue_elems = 2;
+      gen6_blorp_emit_vertex_buffer_state(brw, blorp_num_vue_elems,
+                                          sizeof(vertices), vertex_offset);
+   }
 
    /* 3DSTATE_VERTEX_ELEMENTS
     *
@@ -194,18 +201,18 @@ gen6_blorp_emit_vertices(struct brw_context *brw,
       OUT_BATCH(GEN6_VE0_VALID |
                 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
                 0 << BRW_VE0_SRC_OFFSET_SHIFT);
-      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+      OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT |
+                BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT |
+                BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
+                BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT);
       /* Element 1 */
       OUT_BATCH(GEN6_VE0_VALID |
-                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
-                16 << BRW_VE0_SRC_OFFSET_SHIFT);
+                BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+                0 << BRW_VE0_SRC_OFFSET_SHIFT);
       OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
                 BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+                BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
+                BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT);
       ADVANCE_BATCH();
    }
 }
@@ -260,6 +267,11 @@ gen6_blorp_emit_blend_state(struct brw_context *brw,
       blend[i].blend1.pre_blend_clamp_enable = 1;
       blend[i].blend1.post_blend_clamp_enable = 1;
       blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT;
+
+      blend[i].blend1.write_disable_r = params->color_write_disable[0];
+      blend[i].blend1.write_disable_g = params->color_write_disable[1];
+      blend[i].blend1.write_disable_b = params->color_write_disable[2];
+      blend[i].blend1.write_disable_a = params->color_write_disable[3];
    }
 
    return cc_blend_state_offset;
@@ -413,7 +425,7 @@ gen6_blorp_emit_surface_state(struct brw_context *brw,
    assert(tile_y % 2 == 0);
    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
-              (surface->mt->align_h == 4 ?
+              (surface->mt->valign == 4 ?
                BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
 
    /* Emit relocation to surface contents */
@@ -486,7 +498,6 @@ gen6_blorp_emit_sampler_state(struct brw_context *brw,
                           0, /* min LOD */
                           max_lod,
                           0, /* LOD bias */
-                          0, /* base miplevel */
                           0, /* shadow function */
                           non_normalized_coords,
                           0); /* border color offset - unused */
@@ -688,13 +699,16 @@ gen6_blorp_emit_wm_config(struct brw_context *brw,
    dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
    dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */
    if (params->use_wm_prog) {
-      dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
       dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0;
       dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
-      dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */
       dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
    }
 
+   if (params->src.mt) {
+      dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */
+      dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
+   }
+
    if (params->dst.num_samples > 1) {
       dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
       if (prog_data && prog_data->persample_msaa_dispatch)
@@ -821,7 +835,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
 
    /* 3DSTATE_DEPTH_BUFFER */
    {
-      intel_emit_depth_stall_flushes(brw);
+      brw_emit_depth_stall_flushes(brw);
 
       BEGIN_BATCH(7);
       /* 3DSTATE_DEPTH_BUFFER dw0 */
@@ -896,7 +910,7 @@ static void
 gen6_blorp_emit_depth_disable(struct brw_context *brw,
                               const brw_blorp_params *params)
 {
-   intel_emit_depth_stall_flushes(brw);
+   brw_emit_depth_stall_flushes(brw);
 
    BEGIN_BATCH(7);
    OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
@@ -1021,7 +1035,7 @@ gen6_blorp_exec(struct brw_context *brw,
    uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
 
    /* Emit workaround flushes when we switch from drawing to blorping. */
-   intel_emit_post_sync_nonzero_flush(brw);
+   brw_emit_post_sync_nonzero_flush(brw);
 
    gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
    gen6_emit_3dstate_sample_mask(brw,
@@ -1040,7 +1054,6 @@ gen6_blorp_exec(struct brw_context *brw,
    if (params->use_wm_prog) {
       uint32_t wm_surf_offset_renderbuffer;
       uint32_t wm_surf_offset_texture = 0;
-      uint32_t sampler_offset;
       wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
       intel_miptree_used_for_rendering(params->dst.mt);
       wm_surf_offset_renderbuffer =
@@ -1056,7 +1069,10 @@ gen6_blorp_exec(struct brw_context *brw,
          gen6_blorp_emit_binding_table(brw,
                                        wm_surf_offset_renderbuffer,
                                        wm_surf_offset_texture);
-      sampler_offset =
+   }
+
+   if (params->src.mt) {
+      const uint32_t sampler_offset =
          gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true);
       gen6_blorp_emit_sampler_state_pointers(brw, sampler_offset);
    }