X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fsi_cmd_buffer.c;h=f5f4eefcd28373ae23adcb4b4caf52a996f6e362;hb=b462ceb482471dd4aacdd5fa7cef9fe25f823d70;hp=20144d39ea33976e7ad3e933fcf4da300c627892;hpb=9f7edf4d1f4e2d6acf5ed520ce85815fb1c5d884;p=mesa.git diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 20144d39ea3..f5f4eefcd28 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -147,9 +147,10 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device, /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ if (physical_device->rad_info.chip_class < CIK) - radeon_set_config_reg(cs, GRBM_GFX_INDEX, - SE_INDEX(se) | SH_BROADCAST_WRITES | - INSTANCE_BROADCAST_WRITES); + radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, + S_00802C_SE_INDEX(se) | + S_00802C_SH_BROADCAST_WRITES(1) | + S_00802C_INSTANCE_BROADCAST_WRITES(1)); else radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | @@ -161,9 +162,10 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device, /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ if (physical_device->rad_info.chip_class < CIK) - radeon_set_config_reg(cs, GRBM_GFX_INDEX, - SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | - INSTANCE_BROADCAST_WRITES); + radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, + S_00802C_SE_BROADCAST_WRITES(1) | + S_00802C_SH_BROADCAST_WRITES(1) | + S_00802C_INSTANCE_BROADCAST_WRITES(1)); else radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | @@ -516,12 +518,6 @@ si_emit_config(struct radv_physical_device *physical_device, assert(0); } - radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, - S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF)); - /* TODO: Enable the binner: */ - radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, - S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | - S_028C44_DISABLE_START_OF_PRIM(1)); radeon_set_context_reg(cs, R_028C48_PA_SC_BINNER_CNTL_1, S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) | S_028C48_MAX_PRIM_PER_BATCH(1023)); @@ -571,7 +567,9 @@ cik_create_gfx_config(struct radv_device *device) device->gfx_init = device->ws->buffer_create(device->ws, cs->cdw * 4, 4096, RADEON_DOMAIN_GTT, - RADEON_FLAG_CPU_ACCESS); + RADEON_FLAG_CPU_ACCESS| + RADEON_FLAG_NO_INTERPROCESS_SHARING | + RADEON_FLAG_READ_ONLY); if (!device->gfx_init) goto fail; @@ -676,7 +674,8 @@ si_write_scissors(struct radeon_winsys_cs *cs, int first, int i; float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY; const float max_range = 32767.0f; - assert(count); + if (!count) + return; radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2); for (i = 0; i < count; i++) { @@ -988,6 +987,11 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, if (chip_class >= GFX9 && flush_cb_db) { unsigned cb_db_event, tc_flags; +#if 0 + /* This breaks a bunch of: + dEQP-VK.renderpass.dedicated_allocation.formats.d32_sfloat_s8_uint.input*. + use the big hammer always. + */ /* Set the CB/DB flush event. */ switch (flush_cb_db) { case RADV_CMD_FLAG_FLUSH_AND_INV_CB: @@ -1000,25 +1004,30 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, /* both CB & DB */ cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT; } - - /* TC | TC_WB = invalidate L2 data - * TC_MD | TC_WB = invalidate L2 metadata - * TC | TC_WB | TC_MD = invalidate L2 data & metadata +#else + cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT; +#endif + /* These are the only allowed combinations. If you need to + * do multiple operations at once, do them separately. + * All operations that invalidate L2 also seem to invalidate + * metadata. Volatile (VOL) and WC flushes are not listed here. * - * The metadata cache must always be invalidated for coherency - * between CB/DB and shaders. (metadata = HTILE, CMASK, DCC) - * - * TC must be invalidated on GFX9 only if the CB/DB surface is - * not pipe-aligned. If the surface is RB-aligned, it might not - * strictly be pipe-aligned since RB alignment takes precendence. + * TC | TC_WB = writeback & invalidate L2 & L1 + * TC | TC_WB | TC_NC = writeback & invalidate L2 for MTYPE == NC + * TC_WB | TC_NC = writeback L2 for MTYPE == NC + * TC | TC_NC = invalidate L2 for MTYPE == NC + * TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.) + * TCL1 = invalidate L1 */ - tc_flags = EVENT_TC_WB_ACTION_ENA | - EVENT_TC_MD_ACTION_ENA; + tc_flags = EVENT_TC_ACTION_ENA | + EVENT_TC_MD_ACTION_ENA; /* Ideally flush TC together with CB/DB. */ if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) { - tc_flags |= EVENT_TC_ACTION_ENA | - EVENT_TCL1_ACTION_ENA; + /* Writeback and invalidate everything in L2 & L1. */ + tc_flags = EVENT_TC_ACTION_ENA | + EVENT_TC_WB_ACTION_ENA; + /* Clear the flags. */ flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 | @@ -1125,7 +1134,9 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.flush_bits); - radv_cmd_buffer_trace_emit(cmd_buffer); + if (unlikely(cmd_buffer->device->trace_bo)) + radv_cmd_buffer_trace_emit(cmd_buffer); + cmd_buffer->state.flush_bits = 0; } @@ -1249,7 +1260,8 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, 0); } - radv_cmd_buffer_trace_emit(cmd_buffer); + if (unlikely(cmd_buffer->device->trace_bo)) + radv_cmd_buffer_trace_emit(cmd_buffer); } void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,