ilo: enable L3 cache in MOCS
authorChia-I Wu <olvaffe@gmail.com>
Thu, 5 Mar 2015 20:30:07 +0000 (04:30 +0800)
committerChia-I Wu <olvaffe@gmail.com>
Thu, 5 Mar 2015 20:50:19 +0000 (04:50 +0800)
This enables L3 cache in MOCS almost everywhere.

src/gallium/drivers/ilo/ilo_builder.c
src/gallium/drivers/ilo/ilo_builder.h
src/gallium/drivers/ilo/ilo_builder_3d_bottom.h
src/gallium/drivers/ilo/ilo_builder_3d_top.h
src/gallium/drivers/ilo/ilo_builder_render.h

index 56920e5bfb63cedb4489cb422b643d6ceb716605..d068e5bafad2473628da71f98a5fc3fdecb0730b 100644 (file)
@@ -338,6 +338,20 @@ ilo_builder_init(struct ilo_builder *builder,
    builder->dev = dev;
    builder->winsys = winsys;
 
+   /* gen6_SURFACE_STATE() may override this */
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+      builder->mocs = GEN8_MOCS_MT_WB | GEN8_MOCS_CT_L3;
+      break;
+   case ILO_GEN(7.5):
+   case ILO_GEN(7):
+      builder->mocs = GEN7_MOCS_L3_WB;
+      break;
+   default:
+      builder->mocs = 0;
+      break;
+   }
+
    for (i = 0; i < ILO_BUILDER_WRITER_COUNT; i++)
       ilo_builder_writer_init(builder, i);
 }
index cb639d1bf12fe46df1f8627cf864138dac6ff235..e0e9f5359280dcc484aab8a2f6046a3f6bb6b9ea 100644 (file)
@@ -100,6 +100,7 @@ struct ilo_builder_snapshot {
 struct ilo_builder {
    const struct ilo_dev_info *dev;
    struct intel_winsys *winsys;
+   uint32_t mocs;
 
    struct ilo_builder_writer writers[ILO_BUILDER_WRITER_COUNT];
    bool unrecoverable_error;
index ece1423190589928a18c5bfdffbcff53877ff01d..4f203d1e9f5071373d0016ecb74f7ad424b77b2e 100644 (file)
@@ -1182,6 +1182,8 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
       dw[6] = zs->payload[4];
       dw[7] = zs->payload[5];
 
+      dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT;
+
       if (zs->bo) {
          ilo_builder_batch_reloc64(builder, pos + 2, zs->bo,
                zs->payload[1], INTEL_RELOC_WRITE);
@@ -1192,6 +1194,11 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
       dw[5] = zs->payload[4];
       dw[6] = zs->payload[5];
 
+      if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
+         dw[4] |= builder->mocs << GEN7_DEPTH_DW4_MOCS__SHIFT;
+      else
+         dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT;
+
       if (zs->bo) {
          ilo_builder_batch_reloc(builder, pos + 2, zs->bo,
                zs->payload[1], INTEL_RELOC_WRITE);
@@ -1220,6 +1227,8 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
    dw[2] = 0;
 
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
+      dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT;
+
       dw[3] = 0;
       dw[4] = zs->payload[8];
 
@@ -1228,6 +1237,8 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
                zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
       }
    } else {
+      dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT;
+
       if (zs->separate_s8_bo) {
          ilo_builder_batch_reloc(builder, pos + 2,
                zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
@@ -1256,6 +1267,8 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
    dw[2] = 0;
 
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
+      dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT;
+
       dw[3] = 0;
       dw[4] = zs->payload[11];
 
@@ -1264,6 +1277,8 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
                zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE);
       }
    } else {
+      dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT;
+
       if (zs->hiz_bo) {
          ilo_builder_batch_reloc(builder, pos + 2,
                zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE);
index ab1374a4d2bbe6924cf31aabf5ce35d0803a8cc5..b968beb35c5e15b01667ca16bccf6fa7cb8fe70b 100644 (file)
@@ -479,6 +479,11 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
 
       dw[0] = hw_idx << GEN6_VB_DW0_INDEX__SHIFT;
 
+      if (ilo_dev_gen(builder->dev) >= ILO_GEN(8))
+         dw[0] |= builder->mocs << GEN8_VB_DW0_MOCS__SHIFT;
+      else
+         dw[0] |= builder->mocs << GEN6_VB_DW0_MOCS__SHIFT;
+
       if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
          dw[0] |= GEN7_VB_DW0_ADDR_MODIFIED;
 
@@ -650,9 +655,9 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
 
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) |
-           format |
-           (cmd_len - 2);
+   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) |
+           builder->mocs << GEN6_IB_DW0_MOCS__SHIFT |
+           format;
    if (enable_cut_index)
       dw[0] |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
 
@@ -694,7 +699,8 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2);
-   dw[1] = format;
+   dw[1] = format |
+           builder->mocs << GEN8_IB_DW1_MOCS__SHIFT;
    dw[4] = buf->bo_size;
 
    /* ignore ib->offset here in favor of adjusting 3DPRIMITIVE */
@@ -1193,6 +1199,8 @@ gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index, int stride,
            stride;
 
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
+      dw[1] |= builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT;
+
       dw[4] = end - start;
       dw[5] = 0;
       dw[6] = 0;
@@ -1201,6 +1209,8 @@ gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index, int stride,
       ilo_builder_batch_reloc64(builder, pos + 2,
             buf->bo, start, INTEL_RELOC_WRITE);
    } else {
+      dw[1] |= builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT;
+
       ilo_builder_batch_reloc(builder, pos + 2,
             buf->bo, start, INTEL_RELOC_WRITE);
       ilo_builder_batch_reloc(builder, pos + 3,
@@ -1441,7 +1451,9 @@ gen6_3dstate_constant(struct ilo_builder *builder, int subop,
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = cmd | (cmd_len - 2) |
-           buf_enabled << 12;
+           buf_enabled << GEN6_CONSTANT_DW0_BUFFER_ENABLES__SHIFT |
+           builder->mocs << GEN6_CONSTANT_DW0_MOCS__SHIFT;
+
    memcpy(&dw[1], buf_dw, sizeof(buf_dw));
 }
 
@@ -1543,6 +1555,8 @@ gen7_3dstate_constant(struct ilo_builder *builder,
       dw[9] = payload[5];
       dw[10] = 0;
    } else {
+      payload[2] |= builder->mocs << GEN7_CONSTANT_DW_ADDR_MOCS__SHIFT;
+
       memcpy(&dw[1], payload, sizeof(payload));
    }
 }
@@ -1630,6 +1644,11 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
       memcpy(dw, surf->payload, state_len << 2);
 
       if (surf->bo) {
+         const uint32_t mocs = (surf->scanout) ?
+            (GEN8_MOCS_MT_PTE | GEN8_MOCS_CT_L3) : builder->mocs;
+
+         dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT;
+
          ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo,
                surf->payload[8], (for_render) ? INTEL_RELOC_WRITE : 0);
       }
@@ -1642,6 +1661,12 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
       memcpy(dw, surf->payload, state_len << 2);
 
       if (surf->bo) {
+         /*
+          * For scanouts, we should not enable caching in LLC.  Since we only
+          * enable that on Gen8+, we are fine here.
+          */
+         dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT;
+
          ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
                surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0);
       }
index 00080402216ff9434c7245a95e1d7086e212d3b7..34a2e2c248619d405afdd531b6645d3022f0509c 100644 (file)
@@ -201,10 +201,13 @@ ilo_builder_batch_patch_sba(struct ilo_builder *builder)
 
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
       ilo_builder_batch_reloc64(builder, builder->sba_instruction_pos,
-            inst->bo, 1, 0);
+            inst->bo,
+            builder->mocs << GEN8_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
+            0);
    } else {
-      ilo_builder_batch_reloc(builder, builder->sba_instruction_pos,
-            inst->bo, 1, 0);
+      ilo_builder_batch_reloc(builder, builder->sba_instruction_pos, inst->bo,
+            builder->mocs << GEN6_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
+            0);
    }
 }
 
@@ -227,12 +230,18 @@ gen6_state_base_address(struct ilo_builder *builder, bool init_all)
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(COMMON, STATE_BASE_ADDRESS) | (cmd_len - 2);
-   dw[1] = init_all;
+   dw[1] = builder->mocs << GEN6_SBA_MOCS__SHIFT |
+           builder->mocs << GEN6_SBA_DW1_GENERAL_STATELESS_MOCS__SHIFT |
+           init_all;
 
-   ilo_builder_batch_reloc(builder, pos + 2, bat->bo, 1, 0);
-   ilo_builder_batch_reloc(builder, pos + 3, bat->bo, 1, 0);
+   ilo_builder_batch_reloc(builder, pos + 2, bat->bo,
+         builder->mocs << GEN6_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
+         0);
+   ilo_builder_batch_reloc(builder, pos + 3, bat->bo,
+         builder->mocs << GEN6_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
+         0);
 
-   dw[4] = init_all;
+   dw[4] = builder->mocs << GEN6_SBA_MOCS__SHIFT | init_all;
 
    /*
     * Since the instruction writer has WRITER_FLAG_APPEND set, it is tempting
@@ -268,12 +277,16 @@ gen8_state_base_address(struct ilo_builder *builder, bool init_all)
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(COMMON, STATE_BASE_ADDRESS) | (cmd_len - 2);
-   dw[1] = init_all;
+   dw[1] = builder->mocs << GEN8_SBA_MOCS__SHIFT | init_all;
    dw[2] = 0;
-   dw[3] = 0;
-   ilo_builder_batch_reloc64(builder, pos + 4, bat->bo, 1, 0);
-   ilo_builder_batch_reloc64(builder, pos + 6, bat->bo, 1, 0);
-   dw[8] = init_all;
+   dw[3] = builder->mocs << GEN8_SBA_DW3_STATELESS_MOCS__SHIFT;
+   ilo_builder_batch_reloc64(builder, pos + 4, bat->bo,
+         builder->mocs << GEN8_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
+         0);
+   ilo_builder_batch_reloc64(builder, pos + 6, bat->bo,
+         builder->mocs << GEN8_SBA_MOCS__SHIFT | GEN6_SBA_ADDR_MODIFIED,
+         0);
+   dw[8] = builder->mocs << GEN8_SBA_MOCS__SHIFT | init_all;
    dw[9] = 0;
 
    ilo_builder_batch_patch_sba(builder);