From ee4484be3dc827cf15bcf109f5e680dbf1dfbf34 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 4 Mar 2014 16:30:28 -0800 Subject: [PATCH] i965: Set Broadwell MOCS values everywhere it's possible. This patch introduces two pre-canned MOCS values: BDW_MOCS_WB (write-back, all caches) and BDW_MOCS_WT (write-through, all caches). We use write-through caching for render targets, and write-back for all other data. (At least on Haswell, I believe write-back LLC/eLLC didn't work for scan-out buffers, while write-through did.) No performance analysis has been done on the impact of this patch. Signed-off-by: Kenneth Graunke Acked-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_defines.h | 8 ++++++++ src/mesa/drivers/dri/i965/gen8_depth_state.c | 7 ++++--- src/mesa/drivers/dri/i965/gen8_draw_upload.c | 1 + src/mesa/drivers/dri/i965/gen8_misc_state.c | 15 +++++++++------ src/mesa/drivers/dri/i965/gen8_sol_state.c | 3 ++- src/mesa/drivers/dri/i965/gen8_surface_state.c | 5 +++-- 6 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 8a4879ba447..c38e4478b0c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -521,6 +521,10 @@ #define GEN7_SURFACE_ARYSPC_FULL (0 << 10) #define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10) +/* Surface state DW0 */ +#define GEN8_SURFACE_MOCS_SHIFT 24 +#define GEN8_SURFACE_MOCS_MASK INTEL_MASK(30, 24) + /* Surface state DW2 */ #define BRW_SURFACE_HEIGHT_SHIFT 19 #define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19) @@ -2193,6 +2197,10 @@ enum brw_wm_barycentric_interp_mode { #define HSW_MOCS_WB_LLC_WB_ELLC (2 << 1) #define HSW_MOCS_UC_LLC_WB_ELLC (3 << 1) +/* Broadwell: write-back or write-through; always use all the caches. */ +#define BDW_MOCS_WB 0x78 +#define BDW_MOCS_WT 0x58 + #include "intel_chipset.h" #endif diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index 379a2c34905..621951e34fa 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -68,7 +68,7 @@ emit_depth_packets(struct brw_context *brw, OUT_BATCH(0); } OUT_BATCH(((width - 1) << 4) | ((height - 1) << 18) | lod); - OUT_BATCH(((depth - 1) << 21) | (min_array_element << 10)); + OUT_BATCH(((depth - 1) << 21) | (min_array_element << 10) | BDW_MOCS_WB); OUT_BATCH(0); OUT_BATCH(depth_mt ? depth_mt->qpitch >> 2 : 0); ADVANCE_BATCH(); @@ -84,7 +84,7 @@ emit_depth_packets(struct brw_context *brw, } else { BEGIN_BATCH(5); OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2)); - OUT_BATCH(depth_mt->hiz_mt->region->pitch - 1); + OUT_BATCH((depth_mt->hiz_mt->region->pitch - 1) | BDW_MOCS_WB << 25); OUT_RELOC64(depth_mt->hiz_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(depth_mt->hiz_mt->qpitch >> 2); @@ -116,7 +116,8 @@ emit_depth_packets(struct brw_context *brw, * page (which would imply that it does). Experiments with the hardware * indicate that it does. */ - OUT_BATCH(HSW_STENCIL_ENABLED | (2 * stencil_mt->region->pitch - 1)); + OUT_BATCH(HSW_STENCIL_ENABLED | BDW_MOCS_WB << 22 | + (2 * stencil_mt->region->pitch - 1)); OUT_RELOC64(stencil_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, stencil_offset); diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c index f927c135dd9..83bc2402a2a 100644 --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c @@ -105,6 +105,7 @@ gen8_emit_vertices(struct brw_context *brw) dw0 |= i << GEN6_VB0_INDEX_SHIFT; dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; dw0 |= buffer->stride << BRW_VB0_PITCH_SHIFT; + dw0 |= BDW_MOCS_WB << 16; OUT_BATCH(dw0); OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c index 72ac2b23588..464138886ef 100644 --- a/src/mesa/drivers/dri/i965/gen8_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c @@ -37,18 +37,21 @@ static void upload_state_base_address(struct brw_context *brw) OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (16 - 2)); /* General state base address: stateless DP read/write requests */ OUT_BATCH(0); - OUT_BATCH(1); - OUT_BATCH(0); + OUT_BATCH(BDW_MOCS_WB << 2 | 1); + OUT_BATCH(BDW_MOCS_WB << 16); /* Surface state base address: */ - OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); + OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, + BDW_MOCS_WB << 4 | 1); /* Dynamic state base address: */ OUT_RELOC64(brw->batch.bo, - I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0, + BDW_MOCS_WB << 4 | 1); /* Indirect object base address: MEDIA_OBJECT data */ OUT_BATCH(0); - OUT_BATCH(1); + OUT_BATCH(BDW_MOCS_WB << 4 | 1); /* Instruction base address: shader kernels (incl. SIP) */ - OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + BDW_MOCS_WB << 4 | 1); /* General state buffer size */ OUT_BATCH(0xfffff001); diff --git a/src/mesa/drivers/dri/i965/gen8_sol_state.c b/src/mesa/drivers/dri/i965/gen8_sol_state.c index 35a77ac37cf..35aa76606a9 100644 --- a/src/mesa/drivers/dri/i965/gen8_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c @@ -79,7 +79,8 @@ gen8_upload_3dstate_so_buffers(struct brw_context *brw) OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (8 - 2)); OUT_BATCH(GEN8_SO_BUFFER_ENABLE | (i << SO_BUFFER_INDEX_SHIFT) | GEN8_SO_BUFFER_OFFSET_WRITE_ENABLE | - GEN8_SO_BUFFER_OFFSET_ADDRESS_ENABLE); + GEN8_SO_BUFFER_OFFSET_ADDRESS_ENABLE | + (BDW_MOCS_WB << 22)); OUT_RELOC64(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start); OUT_BATCH(xfb_obj->Size[i] / 4 - 1); OUT_RELOC64(brw_obj->offset_bo, diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 27af6ad5ec5..90ec21a5998 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -101,6 +101,7 @@ gen8_emit_buffer_surface_state(struct brw_context *brw, surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | surface_format << BRW_SURFACE_FORMAT_SHIFT | BRW_SURFACE_RC_READ_WRITE; + surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS); surf[2] = SET_FIELD((buffer_size - 1) & 0x7f, GEN7_SURFACE_WIDTH) | SET_FIELD(((buffer_size - 1) >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT); @@ -172,7 +173,7 @@ gen8_update_texture_surface(struct gl_context *ctx, if (mt->logical_depth0 > 1 && tObj->Target != GL_TEXTURE_3D) surf[0] |= GEN8_SURFACE_IS_ARRAY; - surf[1] = mt->qpitch >> 2; + surf[1] = SET_FIELD(BDW_MOCS_WB, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT); @@ -313,7 +314,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, horizontal_alignment(mt) | surface_tiling_mode(region->tiling); - surf[1] = mt->qpitch >> 2; + surf[1] = SET_FIELD(BDW_MOCS_WT, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT); -- 2.30.2