vk: clflush all state for non-LLC GPUs
authorKristian Høgsberg <krh@sweater.jf.intel.com>
Tue, 1 Dec 2015 23:37:12 +0000 (15:37 -0800)
committerKristian Høgsberg Kristensen <kristian.h.kristensen@intel.com>
Fri, 4 Dec 2015 17:51:47 +0000 (09:51 -0800)
13 files changed:
src/vulkan/anv_batch_chain.c
src/vulkan/anv_cmd_buffer.c
src/vulkan/anv_device.c
src/vulkan/anv_meta.c
src/vulkan/anv_meta_clear.c
src/vulkan/anv_pipeline.c
src/vulkan/anv_private.h
src/vulkan/gen7_cmd_buffer.c
src/vulkan/gen7_pipeline.c
src/vulkan/gen7_state.c
src/vulkan/gen8_cmd_buffer.c
src/vulkan/gen8_pipeline.c
src/vulkan/gen8_state.c

index 6a5faea5777ee417476976dd25595380401ed45a..41bae98139703ca86b40003a2c56cbeac2cba8a0 100644 (file)
@@ -724,6 +724,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
       struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
       assert(primary->batch.start == this_bbo->bo.map);
       uint32_t offset = primary->batch.next - primary->batch.start;
+      const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4;
 
       /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we
        * can emit a new command and relocation for the current splice.  In
@@ -732,9 +733,25 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
        * here.
        */
       last_bbo->relocs.num_relocs--;
-      secondary->batch.next -= GEN8_MI_BATCH_BUFFER_START_length * 4;
+      secondary->batch.next -= inst_size;
       emit_batch_buffer_start(secondary, &this_bbo->bo, offset);
       anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
+
+      /* After patching up the secondary buffer, we need to clflush the
+       * modified instruction in case we're on a !llc platform. We use a
+       * little loop to handle the case where the instruction crosses a cache
+       * line boundary.
+       */
+      if (!primary->device->info.has_llc) {
+         void *inst = secondary->batch.next - inst_size;
+         void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK);
+         __builtin_ia32_sfence();
+         while (p < secondary->batch.next) {
+            __builtin_ia32_clflush(p);
+            p += CACHELINE_SIZE;
+         }
+      }
+
       break;
    }
    case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: {
@@ -886,6 +903,11 @@ adjust_relocations_from_block_pool(struct anv_block_pool *pool,
        */
       assert(relocs->relocs[i].offset < pool->state.end);
       uint32_t *reloc_data = pool->map + relocs->relocs[i].offset;
+
+      /* We're reading back the relocated value from potentially incoherent
+       * memory here. However, any change to the value will be from the kernel
+       * writing out relocations, which will keep the CPU cache up to date.
+       */
       relocs->relocs[i].presumed_offset = *reloc_data - relocs->relocs[i].delta;
 
       /* All of the relocations from this block pool to other BO's should
@@ -994,6 +1016,14 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
 
    anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
 
+   if (!cmd_buffer->device->info.has_llc) {
+      __builtin_ia32_sfence();
+      anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
+         for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
+            __builtin_ia32_clflush((*bbo)->bo.map + i);
+      }
+   }
+
    cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) {
       .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects,
       .buffer_count = cmd_buffer->execbuf2.bo_count,
index 19d4be902749af29ad5c3ff20263df94a13ced73..ee437aa6330e15060b90b3a485c20a87f2315080 100644 (file)
@@ -666,7 +666,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
    }
 
    if (layout == NULL)
-      return VK_SUCCESS;
+      goto out;
 
    for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) {
       struct anv_pipeline_binding *binding =
@@ -698,6 +698,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                                               surface_state.map,
                                               stage, desc->type,
                                               bo_offset, desc->range);
+
+         if (!cmd_buffer->device->info.has_llc)
+            anv_state_clflush(surface_state);
+
          break;
       }
 
@@ -724,6 +728,10 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
       add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
    }
 
+ out:
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(*bt_state);
+
    return VK_SUCCESS;
 }
 
@@ -772,20 +780,25 @@ anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
              sampler->state, sizeof(sampler->state));
    }
 
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(*state);
+
    return VK_SUCCESS;
 }
 
 struct anv_state
 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
-                             uint32_t *a, uint32_t dwords, uint32_t alignment)
+                            const void *data, uint32_t size, uint32_t alignment)
 {
    struct anv_state state;
 
-   state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
-                                              dwords * 4, alignment);
-   memcpy(state.map, a, dwords * 4);
+   state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
+   memcpy(state.map, data, size);
+
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(state);
 
-   VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
+   VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
 
    return state;
 }
@@ -804,6 +817,9 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
    for (uint32_t i = 0; i < dwords; i++)
       p[i] = a[i] | b[i];
 
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(state);
+
    VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
 
    return state;
@@ -881,6 +897,9 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
       u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
    }
 
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(state);
+
    return state;
 }
 
index 724c4120a06ada79c609098541c17bb1efb0da88..384a457742f81ba9e90a1185a49302b34b36445e 100644 (file)
@@ -599,6 +599,20 @@ anv_queue_finish(struct anv_queue *queue)
 {
 }
 
+static struct anv_state
+anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
+{
+   struct anv_state state;
+
+   state = anv_state_pool_alloc(pool, size, align);
+   memcpy(state.map, p, size);
+
+   if (!pool->block_pool->device->info.has_llc)
+      anv_state_clflush(state);
+
+   return state;
+}
+
 static void
 anv_device_init_border_colors(struct anv_device *device)
 {
@@ -611,10 +625,8 @@ anv_device_init_border_colors(struct anv_device *device)
       [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
    };
 
-   device->border_colors =
-      anv_state_pool_alloc(&device->dynamic_state_pool,
-                           sizeof(border_colors), 32);
-   memcpy(device->border_colors.map, border_colors, sizeof(border_colors));
+   device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
+                                                    sizeof(border_colors), 32, border_colors);
 }
 
 VkResult anv_CreateDevice(
@@ -885,6 +897,9 @@ VkResult anv_DeviceWaitIdle(
    anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
    anv_batch_emit(&batch, GEN7_MI_NOOP);
 
+   if (!device->info.has_llc)
+      anv_state_clflush(state);
+
    exec2_objects[0].handle = bo->gem_handle;
    exec2_objects[0].relocation_count = 0;
    exec2_objects[0].relocs_ptr = 0;
@@ -1219,6 +1234,13 @@ VkResult anv_CreateFence(
    anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
    anv_batch_emit(&batch, GEN7_MI_NOOP);
 
+   if (!device->info.has_llc) {
+      assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0);
+      assert(batch.next - fence->bo.map <= CACHELINE_SIZE);
+      __builtin_ia32_sfence();
+      __builtin_ia32_clflush(fence->bo.map);
+   }
+
    fence->exec2_objects[0].handle = fence->bo.gem_handle;
    fence->exec2_objects[0].relocation_count = 0;
    fence->exec2_objects[0].relocs_ptr = 0;
index bed01980a0d0079b24e4687368d038ca0c95f396..008c8904d646ad0278dd93b20bd7c3f14d10c98d 100644 (file)
@@ -478,6 +478,8 @@ meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
       },
    };
 
+   anv_state_clflush(vb_state);
+
    struct anv_buffer vertex_buffer = {
       .device = device,
       .size = vb_size,
index 24ff1ea75e49c9be417d9da47d29648492e08c7b..cb1a84ed5334aeaa4e1c76829e5477bf70433ba9 100644 (file)
@@ -316,8 +316,7 @@ emit_load_color_clear(struct anv_cmd_buffer *cmd_buffer,
    };
 
    struct anv_state state =
-      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, sizeof(vertex_data), 16);
-   memcpy(state.map, vertex_data, sizeof(vertex_data));
+      anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16);
 
    struct anv_buffer vertex_buffer = {
       .device = device,
@@ -485,8 +484,7 @@ emit_load_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer,
    };
 
    struct anv_state state =
-      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, sizeof(vertex_data), 16);
-   memcpy(state.map, vertex_data, sizeof(vertex_data));
+      anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16);
 
    struct anv_buffer vertex_buffer = {
       .device = device,
index 367d5180bd3df8c6689834060bfe3a66ffc23d4c..948e675d0915e780051326d8a2652d8b4f1474f9 100644 (file)
@@ -373,8 +373,12 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
 
    memcpy(state.map, data, size);
 
+   if (!pipeline->device->info.has_llc)
+      anv_state_clflush(state);
+
    return state.offset;
 }
+
 static void
 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
                                 gl_shader_stage stage,
index 8be64d8887dd6f971710b53163720ccfbd521a9f..3c0441b66f38333e3e6fa14b358fa5e02d438ae5 100644 (file)
@@ -394,6 +394,22 @@ struct anv_state_stream {
 #define CACHELINE_SIZE 64
 #define CACHELINE_MASK 63
 
+static void inline
+anv_state_clflush(struct anv_state state)
+{
+   /* state.map may not be cacheline aligned, so round down the start pointer
+    * to a cacheline boundary so we flush all pages that contain the state.
+    */
+   void *end = state.map + state.alloc_size;
+   void *p = (void *) (((uintptr_t) state.map) & ~CACHELINE_MASK);
+
+   __builtin_ia32_sfence();
+   while (p < end) {
+      __builtin_ia32_clflush(p);
+      p += CACHELINE_SIZE;
+   }
+}
+
 void anv_block_pool_init(struct anv_block_pool *pool,
                          struct anv_device *device, uint32_t block_size);
 void anv_block_pool_finish(struct anv_block_pool *pool);
@@ -721,6 +737,20 @@ __gen_combine_address(struct anv_batch *batch, void *location,
       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
    } while (0)
 
+#define anv_state_pool_emit(pool, cmd, align, ...) ({                   \
+      const uint32_t __size = __anv_cmd_length(cmd) * 4;                \
+      struct anv_state __state =                                        \
+         anv_state_pool_alloc((pool), __size, align);                   \
+      struct cmd __template = {                                         \
+         __VA_ARGS__                                                    \
+      };                                                                \
+      __anv_cmd_pack(cmd)(NULL, __state.map, &__template);              \
+      VG(VALGRIND_CHECK_MEM_IS_DEFINED(__state.map, __anv_cmd_length(cmd) * 4)); \
+      if (!(pool)->block_pool->device->info.has_llc)                    \
+         anv_state_clflush(__state);                                    \
+      __state;                                                          \
+   })
+
 #define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) {  \
    .GraphicsDataTypeGFDT                        = 0,           \
    .LLCCacheabilityControlLLCCC                 = 0,           \
@@ -1104,8 +1134,7 @@ VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
 void gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer);
 
 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
-                                             uint32_t *a, uint32_t dwords,
-                                             uint32_t alignment);
+                                             const void *data, uint32_t size, uint32_t alignment);
 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
                                               uint32_t *a, uint32_t *b,
                                               uint32_t dwords, uint32_t alignment);
index 7101831080bfe303c950a9345b44094800642e53..5efa7cce00228c4043fa78f8cc019421de2a1593 100644 (file)
@@ -201,6 +201,9 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer,
 
    anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS,
                   .ScissorRectPointer = scissor_state.offset);
+
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(scissor_state);
 }
 
 GENX_FUNC(GEN7, GEN7) void
@@ -266,19 +269,15 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
    if (result != VK_SUCCESS)
       return result;
 
-   struct GEN7_INTERFACE_DESCRIPTOR_DATA desc = {
-      .KernelStartPointer = pipeline->cs_simd,
-      .BindingTablePointer = surfaces.offset,
-      .SamplerStatePointer = samplers.offset,
-      .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
-   };
-
-   uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
    struct anv_state state =
-      anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
-
-   GEN7_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
-
+      anv_state_pool_emit(&device->dynamic_state_pool,
+                          GEN7_INTERFACE_DESCRIPTOR_DATA, 64,
+                          .KernelStartPointer = pipeline->cs_simd,
+                          .BindingTablePointer = surfaces.offset,
+                          .SamplerStatePointer = samplers.offset,
+                          .NumberofThreadsinGPGPUThreadGroup = 0);
+
+   const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
    anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
                   .InterfaceDescriptorTotalLength = size,
                   .InterfaceDescriptorDataStartAddress = state.offset);
@@ -441,6 +440,8 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
             cmd_buffer->state.dynamic.stencil_reference.back,
       };
       GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
+      if (!cmd_buffer->device->info.has_llc)
+         anv_state_clflush(cc_state);
 
       anv_batch_emit(&cmd_buffer->batch,
                      GEN7_3DSTATE_CC_STATE_POINTERS,
index 400b9ae997decc13809a58a26fc7e3d6394a7430..8262956ef072ad6f2963aa3f25d366b3602b4855 100644 (file)
@@ -253,65 +253,60 @@ gen7_emit_cb_state(struct anv_pipeline *pipeline,
 {
    struct anv_device *device = pipeline->device;
 
-   uint32_t num_dwords = GEN7_BLEND_STATE_length;
-   pipeline->blend_state =
-      anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
-
    if (info->pAttachments == NULL) {
-      struct GEN7_BLEND_STATE blend_state = {
-         .ColorBufferBlendEnable = false,
-         .WriteDisableAlpha = false,
-         .WriteDisableRed = false,
-         .WriteDisableGreen = false,
-         .WriteDisableBlue = false,
-      };
-
-      GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state);
+      pipeline->blend_state =
+         anv_state_pool_emit(&device->dynamic_state_pool,
+            GEN7_BLEND_STATE, 64,
+            .ColorBufferBlendEnable = false,
+            .WriteDisableAlpha = false,
+            .WriteDisableRed = false,
+            .WriteDisableGreen = false,
+            .WriteDisableBlue = false);
    } else {
       /* FIXME-GEN7: All render targets share blend state settings on gen7, we
        * can't implement this.
        */
       const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0];
+      pipeline->blend_state =
+         anv_state_pool_emit(&device->dynamic_state_pool,
+            GEN7_BLEND_STATE, 64,
 
-      struct GEN7_BLEND_STATE blend_state = {
-         .ColorBufferBlendEnable = a->blendEnable,
-         .IndependentAlphaBlendEnable = true, /* FIXME: yes? */
-         .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
+            .ColorBufferBlendEnable = a->blendEnable,
+            .IndependentAlphaBlendEnable = true, /* FIXME: yes? */
+            .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
 
-         .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
-         .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
+            .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
+            .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
 
-         .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
-         .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
-         .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
-         .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
+            .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
+            .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
+            .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
+            .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
 
 #     if 0
-         bool                                         AlphaToOneEnable;
-         bool                                         AlphaToCoverageDitherEnable;
+            bool                                AlphaToOneEnable;
+            bool                                AlphaToCoverageDitherEnable;
 #     endif
 
-         .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
-         .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
-         .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
-         .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
+            .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
+            .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
+            .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
+            .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
 
-         .LogicOpEnable = info->logicOpEnable,
-         .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
+            .LogicOpEnable = info->logicOpEnable,
+            .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
 
 #     if 0
-         bool                                         AlphaTestEnable;
-         uint32_t                                     AlphaTestFunction;
-         bool                                         ColorDitherEnable;
-         uint32_t                                     XDitherOffset;
-         uint32_t                                     YDitherOffset;
-         uint32_t                                     ColorClampRange;
-         bool                                         PreBlendColorClampEnable;
-         bool                                         PostBlendColorClampEnable;
+            bool                                AlphaTestEnable;
+            uint32_t                            AlphaTestFunction;
+            bool                                ColorDitherEnable;
+            uint32_t                            XDitherOffset;
+            uint32_t                            YDitherOffset;
+            uint32_t                            ColorClampRange;
+            bool                                PreBlendColorClampEnable;
+            bool                                PostBlendColorClampEnable;
 #     endif
-      };
-
-      GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state);
+            );
     }
 
    anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS,
index 6dcb5bffdf1be1240642f1622149576b58ceb958..6ffbacd8e775911876cd3c2b81a5ee67308dd61a 100644 (file)
@@ -308,6 +308,9 @@ genX(image_view_init)(struct anv_image_view *iview,
 
       GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map,
                                       &surface_state);
+
+      if (!device->info.has_llc)
+         anv_state_clflush(iview->nonrt_surface_state);
    }
 
    if (image->needs_color_rt_surface_state) {
@@ -326,5 +329,7 @@ genX(image_view_init)(struct anv_image_view *iview,
 
       GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map,
                                       &surface_state);
+      if (!device->info.has_llc)
+         anv_state_clflush(iview->color_rt_surface_state);
    }
 }
index a23421102aa4fc160c052a6d31c8352dd14f1470..68ee6bb5696b5a12b636c663e1294efe1c3fbf74 100644 (file)
@@ -107,6 +107,11 @@ emit_viewport_state(struct anv_cmd_buffer *cmd_buffer,
       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 32, &cc_viewport);
    }
 
+   if (!cmd_buffer->device->info.has_llc) {
+      anv_state_clflush(sf_clip_state);
+      anv_state_clflush(cc_state);
+   }
+
    anv_batch_emit(&cmd_buffer->batch,
                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC),
                   .CCViewportPointer = cc_state.offset);
@@ -270,6 +275,9 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
       };
       GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
 
+      if (!cmd_buffer->device->info.has_llc)
+         anv_state_clflush(cc_state);
+
       anv_batch_emit(&cmd_buffer->batch,
                      GEN8_3DSTATE_CC_STATE_POINTERS,
                      .ColorCalcStatePointer = cc_state.offset,
@@ -317,6 +325,9 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
       };
       GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
 
+      if (!cmd_buffer->device->info.has_llc)
+         anv_state_clflush(cc_state);
+
       anv_batch_emit(&cmd_buffer->batch,
                      GEN9_3DSTATE_CC_STATE_POINTERS,
                      .ColorCalcStatePointer = cc_state.offset,
@@ -500,22 +511,18 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
    if (result != VK_SUCCESS)
       return result;
 
-   struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
-      .KernelStartPointer = pipeline->cs_simd,
-      .KernelStartPointerHigh = 0,
-      .BindingTablePointer = surfaces.offset,
-      .BindingTableEntryCount = 0,
-      .SamplerStatePointer = samplers.offset,
-      .SamplerCount = 0,
-      .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
-   };
-
-   uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
    struct anv_state state =
-      anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
-
-   GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, state.map, &desc);
+      anv_state_pool_emit(&device->dynamic_state_pool,
+                          GENX(INTERFACE_DESCRIPTOR_DATA), 64,
+                          .KernelStartPointer = pipeline->cs_simd,
+                          .KernelStartPointerHigh = 0,
+                          .BindingTablePointer = surfaces.offset,
+                          .BindingTableEntryCount = 0,
+                          .SamplerStatePointer = samplers.offset,
+                          .SamplerCount = 0,
+                          .NumberofThreadsinGPGPUThreadGroup = 0);
 
+   uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
    anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
                   .InterfaceDescriptorTotalLength = size,
                   .InterfaceDescriptorDataStartAddress = state.offset);
index d0395741474415bb135205727c6671bab2f3ee49..faf997a4304483d197d55d4fa5f3206a1535e931 100644 (file)
@@ -259,6 +259,8 @@ emit_cb_state(struct anv_pipeline *pipeline,
    }
 
    GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
+   if (!device->info.has_llc)
+      anv_state_clflush(pipeline->blend_state);
 
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS),
                   .BlendStatePointer = pipeline->blend_state.offset,
index e7acbd022818b45eccf625aba446b4df6c89f659..901cc3b25a8076d6ff76788fb1d68b27d0a20fd9 100644 (file)
@@ -280,6 +280,8 @@ genX(image_view_init)(struct anv_image_view *iview,
 
       GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map,
                                       &surface_state);
+      if (!device->info.has_llc)
+         anv_state_clflush(iview->nonrt_surface_state);
    }
 
    if (image->needs_color_rt_surface_state) {
@@ -297,6 +299,8 @@ genX(image_view_init)(struct anv_image_view *iview,
 
       GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map,
                                       &surface_state);
+      if (!device->info.has_llc)
+         anv_state_clflush(iview->color_rt_surface_state);
    }
 }