radeon_emit(cs, 0x80000000);
radeon_emit(cs, 0x80000000);
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_INV_SHADER_CACHE |
- R600_CONTEXT_INV_CONST_CACHE |
- R600_CONTEXT_FLUSH_WITH_INV_L2 |
- R600_CONTEXT_FLAG_COMPUTE;
+ sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_ICACHE |
+ SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_FLUSH_WITH_INV_L2 |
+ SI_CONTEXT_FLAG_COMPUTE;
si_emit_cache_flush(&sctx->b, NULL);
pm4->compute_pkt = true;
si_pm4_free_state(sctx, pm4, ~0);
- sctx->b.flags |= R600_CONTEXT_CS_PARTIAL_FLUSH |
- R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_INV_SHADER_CACHE |
- R600_CONTEXT_INV_CONST_CACHE |
- R600_CONTEXT_FLAG_COMPUTE;
+ sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
+ SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_ICACHE |
+ SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_FLAG_COMPUTE;
si_emit_cache_flush(&sctx->b, NULL);
}
desc->atom.dirty = true;
/* The descriptors are read with the K cache. */
- sctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
+ sctx->b.flags |= SI_CONTEXT_INV_KCACHE;
} else {
desc->atom.dirty = false;
}
}
}
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
+ sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2;
si_update_descriptors(sctx, &samplers->views.desc);
}
* on performance (confirmed by testing). New descriptors are always
* uploaded to a fresh new buffer, so I don't think flushing the const
* cache is needed. */
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
+ sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2;
}
/* Flush the caches where the resource is bound. */
/* XXX only flush the caches where the buffer is bound. */
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_INV_CONST_CACHE |
- R600_CONTEXT_FLUSH_AND_INV_CB |
- R600_CONTEXT_FLUSH_AND_INV_DB |
- R600_CONTEXT_FLUSH_AND_INV_CB_META |
- R600_CONTEXT_FLUSH_AND_INV_DB_META;
- sctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
+ sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_FLUSH_AND_INV_CB_META |
+ SI_CONTEXT_FLUSH_AND_INV_DB_META;
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
while (size) {
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
/* Flush the caches again in case the 3D engine has been prefetching
* the resource. */
/* XXX only flush the caches where the buffer is bound. */
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_INV_CONST_CACHE |
- R600_CONTEXT_FLUSH_AND_INV_CB |
- R600_CONTEXT_FLUSH_AND_INV_DB |
- R600_CONTEXT_FLUSH_AND_INV_CB_META |
- R600_CONTEXT_FLUSH_AND_INV_DB_META;
+ sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_FLUSH_AND_INV_CB_META |
+ SI_CONTEXT_FLUSH_AND_INV_DB_META;
}
void si_copy_buffer(struct si_context *sctx,
src_offset += r600_resource(src)->gpu_address;
/* Flush the caches where the resource is bound. */
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_INV_CONST_CACHE |
- R600_CONTEXT_FLUSH_AND_INV_CB |
- R600_CONTEXT_FLUSH_AND_INV_DB |
- R600_CONTEXT_FLUSH_AND_INV_CB_META |
- R600_CONTEXT_FLUSH_AND_INV_DB_META |
- R600_CONTEXT_PS_PARTIAL_FLUSH;
+ sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_FLUSH_AND_INV_CB_META |
+ SI_CONTEXT_FLUSH_AND_INV_DB_META |
+ SI_CONTEXT_PS_PARTIAL_FLUSH;
while (size) {
unsigned sync_flags = 0;
dst_offset += byte_count;
}
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_INV_CONST_CACHE |
- R600_CONTEXT_FLUSH_AND_INV_CB |
- R600_CONTEXT_FLUSH_AND_INV_DB |
- R600_CONTEXT_FLUSH_AND_INV_CB_META |
- R600_CONTEXT_FLUSH_AND_INV_DB_META;
+ sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_FLUSH_AND_INV_CB_META |
+ SI_CONTEXT_FLUSH_AND_INV_DB_META;
}
/* INIT/DEINIT */
r600_preflush_suspend_features(&ctx->b);
- ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
- R600_CONTEXT_FLUSH_AND_INV_CB_META |
- R600_CONTEXT_FLUSH_AND_INV_DB |
- R600_CONTEXT_FLUSH_AND_INV_DB_META |
- R600_CONTEXT_INV_TEX_CACHE |
+ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_CB_META |
+ SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_FLUSH_AND_INV_DB_META |
+ SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
/* this is probably not needed anymore */
- R600_CONTEXT_PS_PARTIAL_FLUSH;
+ SI_CONTEXT_PS_PARTIAL_FLUSH;
si_emit_cache_flush(&ctx->b, NULL);
/* force to keep tiling flags */
void si_begin_new_cs(struct si_context *ctx)
{
/* Flush read caches at the beginning of CS. */
- ctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_INV_CONST_CACHE |
- R600_CONTEXT_INV_SHADER_CACHE;
+ ctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_INV_ICACHE;
/* set all valid group as dirty so they get reemited on
* next draw command
#define SI_MAX_DRAW_CS_DWORDS \
(/*derived prim state:*/ 6 + /*draw regs:*/ 16 + /*draw packets:*/ 31)
-/* read caches */
-#define R600_CONTEXT_INV_TEX_CACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
-#define R600_CONTEXT_INV_CONST_CACHE (R600_CONTEXT_PRIVATE_FLAG << 1)
-#define R600_CONTEXT_INV_SHADER_CACHE (R600_CONTEXT_PRIVATE_FLAG << 2)
-/* read-write caches */
-#define R600_CONTEXT_FLUSH_AND_INV (R600_CONTEXT_PRIVATE_FLAG << 3)
-#define R600_CONTEXT_FLUSH_AND_INV_CB_META (R600_CONTEXT_PRIVATE_FLAG << 4)
-#define R600_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 5)
-#define R600_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6)
-#define R600_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 7)
-#define R600_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 8)
-/* engine synchronization */
-#define R600_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9)
-#define R600_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10)
-#define R600_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 11)
-#define R600_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 12)
-/* other flags */
-#define R600_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 13)
+/* Instruction cache. */
+#define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
+/* Cache used by scalar memory (SMEM) instructions. They also use TC
+ * as a second level cache, which isn't flushed by this.
+ * Other names: constant cache, data cache, DCACHE */
+#define SI_CONTEXT_INV_KCACHE (R600_CONTEXT_PRIVATE_FLAG << 1)
+/* Caches used by vector memory (VMEM) instructions.
+ * L1 can optionally be bypassed (GLC=1) and can only be used by shaders.
+ * L2 is used by shaders and can be used by other blocks (CP, sDMA). */
+#define SI_CONTEXT_INV_TC_L1 (R600_CONTEXT_PRIVATE_FLAG << 2)
+#define SI_CONTEXT_INV_TC_L2 (R600_CONTEXT_PRIVATE_FLAG << 3)
+/* Framebuffer caches. */
+#define SI_CONTEXT_FLUSH_AND_INV_CB_META (R600_CONTEXT_PRIVATE_FLAG << 4)
+#define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 5)
+#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6)
+#define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 7)
+/* Engine synchronization. */
+#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8)
+#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9)
+#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10)
+#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 11)
+/* Compute only. */
+#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 12) /* TODO: merge with TC? */
+#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 13)
struct si_compute;
int i;
if (sctx->framebuffer.state.nr_cbufs) {
- sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
- R600_CONTEXT_FLUSH_AND_INV_CB_META;
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_CB_META;
}
if (sctx->framebuffer.state.zsbuf) {
- sctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_DB |
- R600_CONTEXT_FLUSH_AND_INV_DB_META;
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_FLUSH_AND_INV_DB_META;
}
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
{
struct si_context *sctx = (struct si_context *)ctx;
- sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_FLUSH_AND_INV_CB;
+ sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_FLUSH_AND_INV_CB;
}
static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
uint32_t cp_coher_cntl = 0;
uint32_t compute =
- PKT3_SHADER_TYPE_S(!!(sctx->flags & R600_CONTEXT_FLAG_COMPUTE));
+ PKT3_SHADER_TYPE_S(!!(sctx->flags & SI_CONTEXT_FLAG_COMPUTE));
/* XXX SI flushes both ICACHE and KCACHE if either flag is set.
* XXX CIK shouldn't have this issue. Test CIK before separating the flags
* XXX to ensure there is no regression. Also find out if there is another
* XXX way to flush either ICACHE or KCACHE but not both for SI. */
- if (sctx->flags & (R600_CONTEXT_INV_SHADER_CACHE |
- R600_CONTEXT_INV_CONST_CACHE)) {
+ if (sctx->flags & (SI_CONTEXT_INV_ICACHE |
+ SI_CONTEXT_INV_KCACHE)) {
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1) |
S_0085F0_SH_KCACHE_ACTION_ENA(1);
}
- if (sctx->flags & (R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_STREAMOUT_FLUSH)) {
- cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1) |
- S_0085F0_TCL1_ACTION_ENA(1);
- }
- if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB) {
+
+ if (sctx->flags & (SI_CONTEXT_INV_TC_L1 | R600_CONTEXT_STREAMOUT_FLUSH))
+ cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
+ if (sctx->flags & (SI_CONTEXT_INV_TC_L2 | R600_CONTEXT_STREAMOUT_FLUSH))
+ cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
+
+ if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
S_0085F0_CB1_DEST_BASE_ENA(1) |
S_0085F0_CB6_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
}
- if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB) {
+ if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
S_0085F0_DB_DEST_BASE_ENA(1);
}
}
}
- if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META) {
+ if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB_META) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
}
- if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB_META) {
+ if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB_META) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
}
- if (sctx->flags & R600_CONTEXT_FLUSH_WITH_INV_L2) {
+ if (sctx->flags & SI_CONTEXT_FLUSH_WITH_INV_L2) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) | EVENT_INDEX(7) |
EVENT_WRITE_INV_L2);
}
- if (sctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) {
+ if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
} else if (sctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}
- if (sctx->flags & R600_CONTEXT_CS_PARTIAL_FLUSH) {
+ if (sctx->flags & SI_CONTEXT_CS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
}
- if (sctx->flags & R600_CONTEXT_VGT_FLUSH) {
+ if (sctx->flags & SI_CONTEXT_VGT_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
}
- if (sctx->flags & R600_CONTEXT_VGT_STREAMOUT_SYNC) {
+ if (sctx->flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
}
if (sctx->b.family == CHIP_HAWAII &&
(sctx->b.streamout.streamout_enabled ||
sctx->b.streamout.prims_gen_query_enabled)) {
- sctx->b.flags |= R600_CONTEXT_VGT_STREAMOUT_SYNC;
+ sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
}
/* Set the depth buffer as dirty. */
if (!sctx->gs_rings)
si_init_gs_rings(sctx);
if (sctx->emitted.named.gs_rings != sctx->gs_rings)
- sctx->b.flags |= R600_CONTEXT_VGT_FLUSH;
+ sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,