We can't use the more fine-grained load and store fence commands (lfence
and mfence), since clflush is only guaranteed to be ordered with respect
to mfence.
if (!primary->device->info.has_llc) {
void *inst = secondary->batch.next - inst_size;
void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK);
if (!primary->device->info.has_llc) {
void *inst = secondary->batch.next - inst_size;
void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK);
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
while (p < secondary->batch.next) {
__builtin_ia32_clflush(p);
p += CACHELINE_SIZE;
while (p < secondary->batch.next) {
__builtin_ia32_clflush(p);
p += CACHELINE_SIZE;
anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
if (!cmd_buffer->device->info.has_llc) {
anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
if (!cmd_buffer->device->info.has_llc) {
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
__builtin_ia32_clflush((*bbo)->bo.map + i);
anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
__builtin_ia32_clflush((*bbo)->bo.map + i);
return VK_SUCCESS;
/* Make sure the writes we're flushing have landed. */
return VK_SUCCESS;
/* Make sure the writes we're flushing have landed. */
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
/* Make sure no reads get moved up above the invalidate. */
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
/* Make sure no reads get moved up above the invalidate. */
- __builtin_ia32_lfence();
+ __builtin_ia32_mfence();
if (!device->info.has_llc) {
assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0);
assert(batch.next - fence->bo.map <= CACHELINE_SIZE);
if (!device->info.has_llc) {
assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0);
assert(batch.next - fence->bo.map <= CACHELINE_SIZE);
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
__builtin_ia32_clflush(fence->bo.map);
}
__builtin_ia32_clflush(fence->bo.map);
}
if (!device->info.has_llc) {
/* Make sure the writes we're flushing have landed. */
if (!device->info.has_llc) {
/* Make sure the writes we're flushing have landed. */
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
__builtin_ia32_clflush(event);
}
__builtin_ia32_clflush(event);
}
ANV_FROM_HANDLE(anv_event, event, _event);
if (!device->info.has_llc) {
ANV_FROM_HANDLE(anv_event, event, _event);
if (!device->info.has_llc) {
- /* Make sure the writes we're flushing have landed. */
+ /* Invalidate read cache before reading event written by GPU. */
__builtin_ia32_clflush(event);
__builtin_ia32_clflush(event);
- __builtin_ia32_lfence();
+ __builtin_ia32_mfence();
+
}
return event->semaphore;
}
return event->semaphore;
if (!device->info.has_llc) {
/* Make sure the writes we're flushing have landed. */
if (!device->info.has_llc) {
/* Make sure the writes we're flushing have landed. */
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
__builtin_ia32_clflush(event);
}
__builtin_ia32_clflush(event);
}
if (!device->info.has_llc) {
/* Make sure the writes we're flushing have landed. */
if (!device->info.has_llc) {
/* Make sure the writes we're flushing have landed. */
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
__builtin_ia32_clflush(event);
}
__builtin_ia32_clflush(event);
}
void *end = state.map + state.alloc_size;
void *p = (void *) (((uintptr_t) state.map) & ~CACHELINE_MASK);
void *end = state.map + state.alloc_size;
void *p = (void *) (((uintptr_t) state.map) & ~CACHELINE_MASK);
- __builtin_ia32_sfence();
+ __builtin_ia32_mfence();
while (p < end) {
__builtin_ia32_clflush(p);
p += CACHELINE_SIZE;
while (p < end) {
__builtin_ia32_clflush(p);
p += CACHELINE_SIZE;