struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
assert(primary->batch.start == this_bbo->bo.map);
uint32_t offset = primary->batch.next - primary->batch.start;
+ const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4;
/* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we
* can emit a new command and relocation for the current splice. In
* here.
*/
last_bbo->relocs.num_relocs--;
- secondary->batch.next -= GEN8_MI_BATCH_BUFFER_START_length * 4;
+ secondary->batch.next -= inst_size;
emit_batch_buffer_start(secondary, &this_bbo->bo, offset);
anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
+
+ /* After patching up the secondary buffer, we need to clflush the
+ * modified instruction in case we're on a !llc platform. We use a
+ * little loop to handle the case where the instruction crosses a cache
+ * line boundary.
+ */
+ if (!primary->device->info.has_llc) {
+ void *inst = secondary->batch.next - inst_size;
+ void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK);
+ __builtin_ia32_sfence();
+ while (p < secondary->batch.next) {
+ __builtin_ia32_clflush(p);
+ p += CACHELINE_SIZE;
+ }
+ }
+
break;
}
case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: {
*/
assert(relocs->relocs[i].offset < pool->state.end);
uint32_t *reloc_data = pool->map + relocs->relocs[i].offset;
+
+ /* We're reading back the relocated value from potentially incoherent
+ * memory here. However, any change to the value will be from the kernel
+ * writing out relocations, which will keep the CPU cache up to date.
+ */
relocs->relocs[i].presumed_offset = *reloc_data - relocs->relocs[i].delta;
/* All of the relocations from this block pool to other BO's should
anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
+ if (!cmd_buffer->device->info.has_llc) {
+ __builtin_ia32_sfence();
+ anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
+ for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
+ __builtin_ia32_clflush((*bbo)->bo.map + i);
+ }
+ }
+
cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects,
.buffer_count = cmd_buffer->execbuf2.bo_count,
}
if (layout == NULL)
- return VK_SUCCESS;
+ goto out;
for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) {
struct anv_pipeline_binding *binding =
surface_state.map,
stage, desc->type,
bo_offset, desc->range);
+
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(surface_state);
+
break;
}
add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
}
+ out:
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(*bt_state);
+
return VK_SUCCESS;
}
sampler->state, sizeof(sampler->state));
}
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(*state);
+
return VK_SUCCESS;
}
struct anv_state
anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
- uint32_t *a, uint32_t dwords, uint32_t alignment)
+ const void *data, uint32_t size, uint32_t alignment)
{
struct anv_state state;
- state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
- dwords * 4, alignment);
- memcpy(state.map, a, dwords * 4);
+ state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
+ memcpy(state.map, data, size);
+
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state);
- VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
return state;
}
for (uint32_t i = 0; i < dwords; i++)
p[i] = a[i] | b[i];
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state);
+
VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
return state;
u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
}
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state);
+
return state;
}
{
}
+static struct anv_state
+anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
+{
+ struct anv_state state;
+
+ state = anv_state_pool_alloc(pool, size, align);
+ memcpy(state.map, p, size);
+
+ if (!pool->block_pool->device->info.has_llc)
+ anv_state_clflush(state);
+
+ return state;
+}
+
static void
anv_device_init_border_colors(struct anv_device *device)
{
[VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
};
- device->border_colors =
- anv_state_pool_alloc(&device->dynamic_state_pool,
- sizeof(border_colors), 32);
- memcpy(device->border_colors.map, border_colors, sizeof(border_colors));
+ device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
+ sizeof(border_colors), 32, border_colors);
}
VkResult anv_CreateDevice(
anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
anv_batch_emit(&batch, GEN7_MI_NOOP);
+ if (!device->info.has_llc)
+ anv_state_clflush(state);
+
exec2_objects[0].handle = bo->gem_handle;
exec2_objects[0].relocation_count = 0;
exec2_objects[0].relocs_ptr = 0;
anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
anv_batch_emit(&batch, GEN7_MI_NOOP);
+ if (!device->info.has_llc) {
+ assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0);
+ assert(batch.next - fence->bo.map <= CACHELINE_SIZE);
+ __builtin_ia32_sfence();
+ __builtin_ia32_clflush(fence->bo.map);
+ }
+
fence->exec2_objects[0].handle = fence->bo.gem_handle;
fence->exec2_objects[0].relocation_count = 0;
fence->exec2_objects[0].relocs_ptr = 0;
},
};
+ anv_state_clflush(vb_state);
+
struct anv_buffer vertex_buffer = {
.device = device,
.size = vb_size,
};
struct anv_state state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, sizeof(vertex_data), 16);
- memcpy(state.map, vertex_data, sizeof(vertex_data));
+ anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16);
struct anv_buffer vertex_buffer = {
.device = device,
};
struct anv_state state =
- anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, sizeof(vertex_data), 16);
- memcpy(state.map, vertex_data, sizeof(vertex_data));
+ anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16);
struct anv_buffer vertex_buffer = {
.device = device,
memcpy(state.map, data, size);
+ if (!pipeline->device->info.has_llc)
+ anv_state_clflush(state);
+
return state.offset;
}
+
static void
anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
gl_shader_stage stage,
#define CACHELINE_SIZE 64
#define CACHELINE_MASK 63
+static void inline
+anv_state_clflush(struct anv_state state)
+{
+ /* state.map may not be cacheline aligned, so round down the start pointer
+ * to a cacheline boundary so we flush all pages that contain the state.
+ */
+ void *end = state.map + state.alloc_size;
+ void *p = (void *) (((uintptr_t) state.map) & ~CACHELINE_MASK);
+
+ __builtin_ia32_sfence();
+ while (p < end) {
+ __builtin_ia32_clflush(p);
+ p += CACHELINE_SIZE;
+ }
+}
+
void anv_block_pool_init(struct anv_block_pool *pool,
struct anv_device *device, uint32_t block_size);
void anv_block_pool_finish(struct anv_block_pool *pool);
VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
} while (0)
+#define anv_state_pool_emit(pool, cmd, align, ...) ({ \
+ const uint32_t __size = __anv_cmd_length(cmd) * 4; \
+ struct anv_state __state = \
+ anv_state_pool_alloc((pool), __size, align); \
+ struct cmd __template = { \
+ __VA_ARGS__ \
+ }; \
+ __anv_cmd_pack(cmd)(NULL, __state.map, &__template); \
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(__state.map, __anv_cmd_length(cmd) * 4)); \
+ if (!(pool)->block_pool->device->info.has_llc) \
+ anv_state_clflush(__state); \
+ __state; \
+ })
+
#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \
.GraphicsDataTypeGFDT = 0, \
.LLCCacheabilityControlLLCCC = 0, \
void gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer);
struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
- uint32_t *a, uint32_t dwords,
- uint32_t alignment);
+ const void *data, uint32_t size, uint32_t alignment);
struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
uint32_t *a, uint32_t *b,
uint32_t dwords, uint32_t alignment);
anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS,
.ScissorRectPointer = scissor_state.offset);
+
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(scissor_state);
}
GENX_FUNC(GEN7, GEN7) void
if (result != VK_SUCCESS)
return result;
- struct GEN7_INTERFACE_DESCRIPTOR_DATA desc = {
- .KernelStartPointer = pipeline->cs_simd,
- .BindingTablePointer = surfaces.offset,
- .SamplerStatePointer = samplers.offset,
- .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
- };
-
- uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
struct anv_state state =
- anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
-
- GEN7_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
-
+ anv_state_pool_emit(&device->dynamic_state_pool,
+ GEN7_INTERFACE_DESCRIPTOR_DATA, 64,
+ .KernelStartPointer = pipeline->cs_simd,
+ .BindingTablePointer = surfaces.offset,
+ .SamplerStatePointer = samplers.offset,
+ .NumberofThreadsinGPGPUThreadGroup = 0);
+
+ const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
.InterfaceDescriptorTotalLength = size,
.InterfaceDescriptorDataStartAddress = state.offset);
cmd_buffer->state.dynamic.stencil_reference.back,
};
GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(cc_state);
anv_batch_emit(&cmd_buffer->batch,
GEN7_3DSTATE_CC_STATE_POINTERS,
{
struct anv_device *device = pipeline->device;
- uint32_t num_dwords = GEN7_BLEND_STATE_length;
- pipeline->blend_state =
- anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
-
if (info->pAttachments == NULL) {
- struct GEN7_BLEND_STATE blend_state = {
- .ColorBufferBlendEnable = false,
- .WriteDisableAlpha = false,
- .WriteDisableRed = false,
- .WriteDisableGreen = false,
- .WriteDisableBlue = false,
- };
-
- GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state);
+ pipeline->blend_state =
+ anv_state_pool_emit(&device->dynamic_state_pool,
+ GEN7_BLEND_STATE, 64,
+ .ColorBufferBlendEnable = false,
+ .WriteDisableAlpha = false,
+ .WriteDisableRed = false,
+ .WriteDisableGreen = false,
+ .WriteDisableBlue = false);
} else {
/* FIXME-GEN7: All render targets share blend state settings on gen7, we
* can't implement this.
*/
const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0];
+ pipeline->blend_state =
+ anv_state_pool_emit(&device->dynamic_state_pool,
+ GEN7_BLEND_STATE, 64,
- struct GEN7_BLEND_STATE blend_state = {
- .ColorBufferBlendEnable = a->blendEnable,
- .IndependentAlphaBlendEnable = true, /* FIXME: yes? */
- .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
+ .ColorBufferBlendEnable = a->blendEnable,
+ .IndependentAlphaBlendEnable = true, /* FIXME: yes? */
+ .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
- .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
- .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
+ .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
+ .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
- .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
- .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
- .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
- .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
+ .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
+ .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
+ .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
+ .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
# if 0
- bool AlphaToOneEnable;
- bool AlphaToCoverageDitherEnable;
+ bool AlphaToOneEnable;
+ bool AlphaToCoverageDitherEnable;
# endif
- .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
- .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
- .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
- .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
+ .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
+ .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
+ .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
+ .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
- .LogicOpEnable = info->logicOpEnable,
- .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
+ .LogicOpEnable = info->logicOpEnable,
+ .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
# if 0
- bool AlphaTestEnable;
- uint32_t AlphaTestFunction;
- bool ColorDitherEnable;
- uint32_t XDitherOffset;
- uint32_t YDitherOffset;
- uint32_t ColorClampRange;
- bool PreBlendColorClampEnable;
- bool PostBlendColorClampEnable;
+ bool AlphaTestEnable;
+ uint32_t AlphaTestFunction;
+ bool ColorDitherEnable;
+ uint32_t XDitherOffset;
+ uint32_t YDitherOffset;
+ uint32_t ColorClampRange;
+ bool PreBlendColorClampEnable;
+ bool PostBlendColorClampEnable;
# endif
- };
-
- GEN7_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state);
+ );
}
anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS,
GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map,
&surface_state);
+
+ if (!device->info.has_llc)
+ anv_state_clflush(iview->nonrt_surface_state);
}
if (image->needs_color_rt_surface_state) {
GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map,
&surface_state);
+ if (!device->info.has_llc)
+ anv_state_clflush(iview->color_rt_surface_state);
}
}
GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 32, &cc_viewport);
}
+ if (!cmd_buffer->device->info.has_llc) {
+ anv_state_clflush(sf_clip_state);
+ anv_state_clflush(cc_state);
+ }
+
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC),
.CCViewportPointer = cc_state.offset);
};
GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(cc_state);
+
anv_batch_emit(&cmd_buffer->batch,
GEN8_3DSTATE_CC_STATE_POINTERS,
.ColorCalcStatePointer = cc_state.offset,
};
GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(cc_state);
+
anv_batch_emit(&cmd_buffer->batch,
GEN9_3DSTATE_CC_STATE_POINTERS,
.ColorCalcStatePointer = cc_state.offset,
if (result != VK_SUCCESS)
return result;
- struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
- .KernelStartPointer = pipeline->cs_simd,
- .KernelStartPointerHigh = 0,
- .BindingTablePointer = surfaces.offset,
- .BindingTableEntryCount = 0,
- .SamplerStatePointer = samplers.offset,
- .SamplerCount = 0,
- .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
- };
-
- uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
struct anv_state state =
- anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
-
- GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, state.map, &desc);
+ anv_state_pool_emit(&device->dynamic_state_pool,
+ GENX(INTERFACE_DESCRIPTOR_DATA), 64,
+ .KernelStartPointer = pipeline->cs_simd,
+ .KernelStartPointerHigh = 0,
+ .BindingTablePointer = surfaces.offset,
+ .BindingTableEntryCount = 0,
+ .SamplerStatePointer = samplers.offset,
+ .SamplerCount = 0,
+ .NumberofThreadsinGPGPUThreadGroup = 0);
+ uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
.InterfaceDescriptorTotalLength = size,
.InterfaceDescriptorDataStartAddress = state.offset);
}
GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
+ if (!device->info.has_llc)
+ anv_state_clflush(pipeline->blend_state);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS),
.BlendStatePointer = pipeline->blend_state.offset,
GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->nonrt_surface_state.map,
&surface_state);
+ if (!device->info.has_llc)
+ anv_state_clflush(iview->nonrt_surface_state);
}
if (image->needs_color_rt_surface_state) {
GENX(RENDER_SURFACE_STATE_pack)(NULL, iview->color_rt_surface_state.map,
&surface_state);
+ if (!device->info.has_llc)
+ anv_state_clflush(iview->color_rt_surface_state);
}
}