unsigned cs_dwords_per_thread =
test_cs ? cs_dwords_per_thread_list[cs_method % NUM_SHADERS] : 0;
- if (sctx->chip_class == SI) {
- /* SI doesn't support CP DMA operations through L2. */
+ if (test_sdma && !sctx->dma_cs)
+ continue;
+
+ if (sctx->chip_class == GFX6) {
+ /* GFX6 doesn't support CP DMA operations through L2. */
if (test_cp && cache_policy != L2_BYPASS)
continue;
- /* WAVES_PER_SH is in multiples of 16 on SI. */
+ /* WAVES_PER_SH is in multiples of 16 on GFX6. */
if (test_cs && cs_waves_per_sh % 16 != 0)
continue;
}
unsigned query_type = PIPE_QUERY_TIME_ELAPSED;
if (test_sdma) {
- if (sctx->chip_class == SI)
+ if (sctx->chip_class == GFX6)
query_type = SI_QUERY_TIME_ELAPSED_SDMA_SI;
else
query_type = SI_QUERY_TIME_ELAPSED_SDMA;
si_cp_dma_copy_buffer(sctx, dst, src, 0, 0, size, 0,
SI_COHERENCY_NONE, cache_policy);
} else {
- si_cp_dma_clear_buffer(sctx, dst, 0, size, clear_value,
+ si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, 0, size,
+ clear_value, 0,
SI_COHERENCY_NONE, cache_policy);
}
} else if (test_sdma) {
sctx->cs_user_data[i] = clear_value;
}
- sctx->flags |= SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_SMEM_L1;
+ sctx->flags |= SI_CONTEXT_INV_VCACHE |
+ SI_CONTEXT_INV_SCACHE;
- ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, is_copy ? 2 : 1, sb);
+ ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0,
+ is_copy ? 2 : 1, sb, 0x1);
ctx->bind_compute_state(ctx, cs);
sctx->cs_max_waves_per_sh = cs_waves_per_sh;
/* Flush L2, so that we don't just test L2 cache performance. */
if (!test_sdma) {
- sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+ sctx->flags |= SI_CONTEXT_WB_L2;
si_emit_cache_flush(sctx);
}
if (!r->is_valid)
continue;
- /* Ban CP DMA clears via MC on <= VI. They are super slow
+ /* Ban CP DMA clears via MC on <= GFX8. They are super slow
* on GTT, which we can get due to BO evictions.
*/
- if (sctx->chip_class <= VI && placement == 1 &&
+ if (sctx->chip_class <= GFX8 && placement == 1 &&
r->is_cp && r->cache_policy == L2_BYPASS)
continue;