radv/gfx9: add support for 3d images to blit 2d paths
[mesa.git] / src / amd / vulkan / si_cmd_buffer.c
index 1e8b43d4fa894d436eca8947f5ad4c2730e955ff..972d37948aae89da5d73d0ae047155e17ba76dd6 100644 (file)
@@ -147,9 +147,10 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
 
                /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
                if (physical_device->rad_info.chip_class < CIK)
-                       radeon_set_config_reg(cs, GRBM_GFX_INDEX,
-                                             SE_INDEX(se) | SH_BROADCAST_WRITES |
-                                             INSTANCE_BROADCAST_WRITES);
+                       radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
+                                             S_00802C_SE_INDEX(se) |
+                                             S_00802C_SH_BROADCAST_WRITES(1) |
+                                             S_00802C_INSTANCE_BROADCAST_WRITES(1));
                else
                        radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
                                               S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
@@ -161,9 +162,10 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
 
        /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
        if (physical_device->rad_info.chip_class < CIK)
-               radeon_set_config_reg(cs, GRBM_GFX_INDEX,
-                                     SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
-                                     INSTANCE_BROADCAST_WRITES);
+               radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
+                                     S_00802C_SE_BROADCAST_WRITES(1) |
+                                     S_00802C_SH_BROADCAST_WRITES(1) |
+                                     S_00802C_INSTANCE_BROADCAST_WRITES(1));
        else
                radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
                                       S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
@@ -179,7 +181,8 @@ si_emit_compute(struct radv_physical_device *physical_device,
        radeon_emit(cs, 0);
        radeon_emit(cs, 0);
 
-       radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 3);
+       radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+                             S_00B854_WAVES_PER_SH(0x3));
        radeon_emit(cs, 0);
        /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
        radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
@@ -341,7 +344,7 @@ si_emit_config(struct radv_physical_device *physical_device,
        radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1));
 
        if (physical_device->has_clear_state) {
-               radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 1, 0));
+               radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
                radeon_emit(cs, 0);
        }
 
@@ -432,11 +435,15 @@ si_emit_config(struct radv_physical_device *physical_device,
 
        if (physical_device->rad_info.chip_class >= CIK) {
                if (physical_device->rad_info.chip_class >= GFX9) {
-                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+                                         S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
                } else {
-                       radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
-                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
-                       radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+                                         S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+                                         S_00B41C_WAVE_LIMIT(0x3F));
+                       radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+                                         S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
                        /* If this is 0, Bonaire can hang even if GS isn't being used.
                         * Other chips are unaffected. These are suboptimal values,
                         * but we don't use on-chip GS.
@@ -445,7 +452,8 @@ si_emit_config(struct radv_physical_device *physical_device,
                                               S_028A44_ES_VERTS_PER_SUBGRP(64) |
                                               S_028A44_GS_PRIMS_PER_SUBGRP(4));
                }
-               radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
+               radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                                 S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
 
                if (physical_device->rad_info.num_good_compute_units /
                    (physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) {
@@ -455,7 +463,8 @@ si_emit_config(struct radv_physical_device *physical_device,
                         *
                         * LATE_ALLOC_VS = 2 is the highest safe number.
                         */
-                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+                                         S_00B118_CU_EN(0xffff) | S_00B118_WAVE_LIMIT(0x3F) );
                        radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
                } else {
                        /* Set LATE_ALLOC_VS == 31. It should be less than
@@ -463,11 +472,13 @@ si_emit_config(struct radv_physical_device *physical_device,
                         * - VS can't execute on CU0.
                         * - If HS writes outputs to LDS, LS can't execute on CU0.
                         */
-                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
+                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+                                         S_00B118_CU_EN(0xfffe) | S_00B118_WAVE_LIMIT(0x3F));
                        radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
                }
 
-               radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
+               radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+                                 S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
        }
 
        if (physical_device->rad_info.chip_class >= VI) {
@@ -528,6 +539,11 @@ si_emit_config(struct radv_physical_device *physical_device,
        radeon_emit(cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
                    S_028A04_MAX_SIZE(radv_pack_float_12p4(8192/2)));
 
+       if (!physical_device->has_clear_state) {
+               radeon_set_context_reg(cs, R_028004_DB_COUNT_CONTROL,
+                                      S_028004_ZPASS_INCREMENT_DISABLE(1));
+       }
+
        si_emit_compute(physical_device, cs);
 }
 
@@ -557,7 +573,8 @@ cik_create_gfx_config(struct radv_device *device)
        device->gfx_init = device->ws->buffer_create(device->ws,
                                                     cs->cdw * 4, 4096,
                                                     RADEON_DOMAIN_GTT,
-                                                    RADEON_FLAG_CPU_ACCESS);
+                                                    RADEON_FLAG_CPU_ACCESS|
+                                                    RADEON_FLAG_NO_INTERPROCESS_SHARING);
        if (!device->gfx_init)
                goto fail;
 
@@ -958,14 +975,12 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
        }
 
-       if (!flush_cb_db) {
-               if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
-                       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
-                       radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-               } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
-                       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
-                       radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-               }
+       if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+       } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+               radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
        }
 
        if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
@@ -1113,7 +1128,9 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
                               cmd_buffer->state.flush_bits);
 
 
-       radv_cmd_buffer_trace_emit(cmd_buffer);
+       if (unlikely(cmd_buffer->device->trace_bo))
+               radv_cmd_buffer_trace_emit(cmd_buffer);
+
        cmd_buffer->state.flush_bits = 0;
 }
 
@@ -1237,7 +1254,8 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
                radeon_emit(cs, 0);
        }
 
-       radv_cmd_buffer_trace_emit(cmd_buffer);
+       if (unlikely(cmd_buffer->device->trace_bo))
+               radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
 void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,