From: Kristian Høgsberg Date: Tue, 23 Sep 2014 16:46:28 +0000 (-0700) Subject: i965/skl: Use new MOCS for SKL X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c31ce2c40cef21be8a0de48bfdf0307e8d4cd424;p=mesa.git i965/skl: Use new MOCS for SKL On Skylake, the MOCS bits are an index into a table of 63 different, configurable cache configurations. As for previous GENs, we only care about WB and WT, which are available in the documented default set. Define SKL_MOCS_WB and SKL_MOCS_WT to the indices for those configucations and use those for the Skylake MOCS values. Signed-off-by: Kristian Høgsberg Reviewed-by: Kenneth Graunke Reviewed-by: Anuj Phogat --- diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3725452bc98..64ff7445b7f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2407,4 +2407,11 @@ enum brw_wm_barycentric_interp_mode { #define BDW_MOCS_WT 0x58 #define BDW_MOCS_PTE 0x18 +/* Skylake: MOCS is now an index into an array of 64 different configurable + * cache settings. We still use only either write-back or write-through; and + * rely on the documented default values. + */ +#define SKL_MOCS_WB 9 +#define SKL_MOCS_WT 5 + #endif diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index 7c3bfe03f15..a24032c343c 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -48,6 +48,8 @@ emit_depth_packets(struct brw_context *brw, uint32_t lod, uint32_t min_array_element) { + uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; + /* Skip repeated NULL depth/stencil emits (think 2D rendering). */ if (!depth_mt && !stencil_mt && brw->no_depth_or_stencil) { assert(brw->hw_ctx); @@ -73,7 +75,7 @@ emit_depth_packets(struct brw_context *brw, OUT_BATCH(0); } OUT_BATCH(((width - 1) << 4) | ((height - 1) << 18) | lod); - OUT_BATCH(((depth - 1) << 21) | (min_array_element << 10) | BDW_MOCS_WB); + OUT_BATCH(((depth - 1) << 21) | (min_array_element << 10) | mocs_wb); OUT_BATCH(0); OUT_BATCH(((depth - 1) << 21) | (depth_mt ? depth_mt->qpitch >> 2 : 0)); ADVANCE_BATCH(); @@ -89,7 +91,7 @@ emit_depth_packets(struct brw_context *brw, } else { BEGIN_BATCH(5); OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2)); - OUT_BATCH((depth_mt->hiz_mt->pitch - 1) | BDW_MOCS_WB << 25); + OUT_BATCH((depth_mt->hiz_mt->pitch - 1) | mocs_wb << 25); OUT_RELOC64(depth_mt->hiz_mt->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(depth_mt->hiz_mt->qpitch >> 2); @@ -97,7 +99,7 @@ emit_depth_packets(struct brw_context *brw, } if (stencil_mt == NULL) { - BEGIN_BATCH(5); + BEGIN_BATCH(5); OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); @@ -121,7 +123,7 @@ emit_depth_packets(struct brw_context *brw, * page (which would imply that it does). Experiments with the hardware * indicate that it does. */ - OUT_BATCH(HSW_STENCIL_ENABLED | BDW_MOCS_WB << 22 | + OUT_BATCH(HSW_STENCIL_ENABLED | mocs_wb << 22 | (2 * stencil_mt->pitch - 1)); OUT_RELOC64(stencil_mt->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c index 8f0e5155556..3189a3e83af 100644 --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c @@ -39,6 +39,7 @@ static void gen8_emit_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; brw_prepare_vertices(brw); brw_prepare_shader_draw_parameters(brw); @@ -119,7 +120,7 @@ gen8_emit_vertices(struct brw_context *brw) dw0 |= i << GEN6_VB0_INDEX_SHIFT; dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; dw0 |= buffer->stride << BRW_VB0_PITCH_SHIFT; - dw0 |= BDW_MOCS_WB << 16; + dw0 |= mocs_wb << 16; OUT_BATCH(dw0); OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); @@ -129,7 +130,7 @@ gen8_emit_vertices(struct brw_context *brw) if (brw->vs.prog_data->uses_vertexid) { OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT | GEN7_VB0_ADDRESS_MODIFYENABLE | - BDW_MOCS_WB << 16); + mocs_wb << 16); OUT_RELOC64(brw->draw.draw_params_bo, I915_GEM_DOMAIN_VERTEX, 0, brw->draw.draw_params_offset); OUT_BATCH(brw->draw.draw_params_bo->size); @@ -242,13 +243,14 @@ static void gen8_emit_index_buffer(struct brw_context *brw) { const struct _mesa_index_buffer *index_buffer = brw->ib.ib; + uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; if (index_buffer == NULL) return; BEGIN_BATCH(5); OUT_BATCH(CMD_INDEX_BUFFER << 16 | (5 - 2)); - OUT_BATCH(brw_get_index_type(index_buffer->type) << 8 | BDW_MOCS_WB); + OUT_BATCH(brw_get_index_type(index_buffer->type) << 8 | mocs_wb); OUT_RELOC64(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, 0); OUT_BATCH(brw->ib.bo->size); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c index 16567c2c1e9..723d2271aa3 100644 --- a/src/mesa/drivers/dri/i965/gen8_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c @@ -31,6 +31,8 @@ */ static void upload_state_base_address(struct brw_context *brw) { + uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; + perf_debug("Missing MOCS setup for STATE_BASE_ADDRESS."); int pkt_len = brw->gen >= 9 ? 19 : 16; @@ -38,22 +40,22 @@ static void upload_state_base_address(struct brw_context *brw) BEGIN_BATCH(pkt_len); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (pkt_len - 2)); /* General state base address: stateless DP read/write requests */ - OUT_BATCH(BDW_MOCS_WB << 4 | 1); + OUT_BATCH(mocs_wb << 4 | 1); OUT_BATCH(0); - OUT_BATCH(BDW_MOCS_WB << 16); + OUT_BATCH(mocs_wb << 16); /* Surface state base address: */ OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, - BDW_MOCS_WB << 4 | 1); + mocs_wb << 4 | 1); /* Dynamic state base address: */ OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0, - BDW_MOCS_WB << 4 | 1); + mocs_wb << 4 | 1); /* Indirect object base address: MEDIA_OBJECT data */ - OUT_BATCH(BDW_MOCS_WB << 4 | 1); + OUT_BATCH(mocs_wb << 4 | 1); OUT_BATCH(0); /* Instruction base address: shader kernels (incl. SIP) */ OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - BDW_MOCS_WB << 4 | 1); + mocs_wb << 4 | 1); /* General state buffer size */ OUT_BATCH(0xfffff001); diff --git a/src/mesa/drivers/dri/i965/gen8_sol_state.c b/src/mesa/drivers/dri/i965/gen8_sol_state.c index ebcdaf8f9c3..555adcbb257 100644 --- a/src/mesa/drivers/dri/i965/gen8_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c @@ -44,6 +44,7 @@ gen8_upload_3dstate_so_buffers(struct brw_context *brw) ctx->TransformFeedback.CurrentObject; struct brw_transform_feedback_object *brw_obj = (struct brw_transform_feedback_object *) xfb_obj; + uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; /* Set up the up to 4 output buffers. These are the ranges defined in the * gl_transform_feedback_object. @@ -80,7 +81,7 @@ gen8_upload_3dstate_so_buffers(struct brw_context *brw) OUT_BATCH(GEN8_SO_BUFFER_ENABLE | (i << SO_BUFFER_INDEX_SHIFT) | GEN8_SO_BUFFER_OFFSET_WRITE_ENABLE | GEN8_SO_BUFFER_OFFSET_ADDRESS_ENABLE | - (BDW_MOCS_WB << 22)); + (mocs_wb << 22)); OUT_RELOC64(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start); OUT_BATCH(xfb_obj->Size[i] / 4 - 1); OUT_RELOC64(brw_obj->offset_bo, diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index de0e9fe45d8..56c46b0469a 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -143,6 +143,7 @@ gen8_update_texture_surface(struct gl_context *ctx, struct intel_mipmap_tree *aux_mt = NULL; uint32_t aux_mode = 0; mesa_format format = intelObj->_Format; + uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; if (tObj->Target == GL_TEXTURE_BUFFER) { brw_update_buffer_texture_surface(ctx, unit, surf_offset); @@ -193,7 +194,7 @@ gen8_update_texture_surface(struct gl_context *ctx, if (mt->logical_depth0 > 1 && tObj->Target != GL_TEXTURE_3D) surf[0] |= GEN8_SURFACE_IS_ARRAY; - surf[1] = SET_FIELD(BDW_MOCS_WB, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; + surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT); @@ -328,9 +329,10 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1)); GLenum gl_target = rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D; - uint32_t surf_index = brw->wm.prog_data->binding_table.render_target_start + unit; + /* FINISHME: Use PTE MOCS on Skylake. */ + uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_WT : BDW_MOCS_PTE; intel_miptree_used_for_rendering(mt); @@ -383,7 +385,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, horizontal_alignment(mt) | surface_tiling_mode(tiling); - surf[1] = SET_FIELD(BDW_MOCS_PTE, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; + surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);