radv/gfx10: emit VGT_VERTEX_REUSE_BLOCK_CNTL during gfx initialization
[mesa.git] / src / amd / vulkan / si_cmd_buffer.c
index 126cabd390ae281d6bf46cc89a874597272f27d7..91a1db4fc840cfd10b391012247fc9b6dd5b37c8 100644 (file)
@@ -88,7 +88,8 @@ si_emit_compute(struct radv_physical_device *physical_device,
        radeon_emit(cs, 0);
 
        radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
-       /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
+       /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
+        * renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
        radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
        radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
 
@@ -102,6 +103,9 @@ si_emit_compute(struct radv_physical_device *physical_device,
                            S_00B858_SH1_CU_EN(0xffff));
        }
 
+       if (physical_device->rad_info.chip_class >= GFX10)
+               radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
+
        /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
         * and is now per pipe, so it should be handled in the
         * kernel if we want to use something other than the default value,
@@ -237,7 +241,11 @@ si_emit_graphics(struct radv_physical_device *physical_device,
                               S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
                               S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
 
-       if (physical_device->rad_info.chip_class >= GFX9) {
+       if (physical_device->rad_info.chip_class >= GFX10) {
+               radeon_set_uconfig_reg(cs, R_030964_GE_MAX_VTX_INDX, ~0);
+               radeon_set_uconfig_reg(cs, R_030924_GE_MIN_VTX_INDX, 0);
+               radeon_set_uconfig_reg(cs, R_030928_GE_INDX_OFFSET, 0);
+       } else if (physical_device->rad_info.chip_class >= GFX9) {
                radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0);
                radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
                radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
@@ -253,6 +261,19 @@ si_emit_graphics(struct radv_physical_device *physical_device,
        }
 
        if (physical_device->rad_info.chip_class >= GFX7) {
+               if (physical_device->rad_info.chip_class >= GFX10) {
+                       /* Logical CUs 16 - 31 */
+                       radeon_set_sh_reg(cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
+                                         S_00B404_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
+                                         S_00B204_CU_EN(0xffff) |
+                                         S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0));
+                       radeon_set_sh_reg(cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS,
+                                         S_00B104_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
+                                         S_00B004_CU_EN(0xffff));
+               }
+
                if (physical_device->rad_info.chip_class >= GFX9) {
                        radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
                                          S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
@@ -299,6 +320,30 @@ si_emit_graphics(struct radv_physical_device *physical_device,
                                  S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
        }
 
+       if (physical_device->rad_info.chip_class >= GFX10) {
+               radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+               radeon_set_context_reg(cs, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
+                                      physical_device->rad_info.pa_sc_tile_steering_override);
+               radeon_set_context_reg(cs, R_02807C_DB_RMI_L2_CACHE_CONTROL,
+                                      S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
+                                      S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
+                                      S_02807C_HTILE_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
+                                      S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
+                                      S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA_RD) |
+                                      S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA_RD) |
+                                      S_02807C_HTILE_RD_POLICY(V_02807C_CACHE_NOA_RD));
+
+               radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL,
+                                      S_028410_CMASK_WR_POLICY(V_028410_CACHE_STREAM_WR) |
+                                      S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM_WR) |
+                                      S_028410_DCC_WR_POLICY(V_028410_CACHE_STREAM_WR) |
+                                      S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM_WR) |
+                                      S_028410_CMASK_RD_POLICY(V_028410_CACHE_NOA_RD) |
+                                      S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA_RD) |
+                                      S_028410_DCC_RD_POLICY(V_028410_CACHE_NOA_RD) |
+                                      S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_RD));
+       }
+
        if (physical_device->rad_info.chip_class >= GFX8) {
                uint32_t vgt_tess_distribution;
 
@@ -321,6 +366,7 @@ si_emit_graphics(struct radv_physical_device *physical_device,
        if (physical_device->rad_info.chip_class >= GFX9) {
                unsigned num_se = physical_device->rad_info.max_se;
                unsigned pc_lines = 0;
+               unsigned max_alloc_count = 0;
 
                switch (physical_device->rad_info.family) {
                case CHIP_VEGA10:
@@ -330,14 +376,25 @@ si_emit_graphics(struct radv_physical_device *physical_device,
                        break;
                case CHIP_RAVEN:
                case CHIP_RAVEN2:
+               case CHIP_NAVI10:
+               case CHIP_NAVI12:
                        pc_lines = 1024;
                        break;
+               case CHIP_NAVI14:
+                       pc_lines = 512;
+                       break;
                default:
                        assert(0);
                }
 
+               if (physical_device->rad_info.chip_class >= GFX10) {
+                       max_alloc_count = pc_lines / 3;
+               } else {
+                       max_alloc_count = MIN2(128, pc_lines / (4 * num_se));
+               }
+
                radeon_set_context_reg(cs, R_028C48_PA_SC_BINNER_CNTL_1,
-                                      S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
+                                      S_028C48_MAX_ALLOC_COUNT(max_alloc_count) |
                                       S_028C48_MAX_PRIM_PER_BATCH(1023));
                radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
                                       S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
@@ -781,7 +838,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
        
        if (flush_bits & RADV_CMD_FLAG_INV_ICACHE)
                cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
-       if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
+       if (flush_bits & RADV_CMD_FLAG_INV_SCACHE)
                cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
 
        if (chip_class <= GFX8) {
@@ -859,16 +916,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
                           EVENT_TC_MD_ACTION_ENA;
 
                /* Ideally flush TC together with CB/DB. */
-               if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
+               if (flush_bits & RADV_CMD_FLAG_INV_L2) {
                        /* Writeback and invalidate everything in L2 & L1. */
                        tc_flags = EVENT_TC_ACTION_ENA |
                                   EVENT_TC_WB_ACTION_ENA;
 
 
                        /* Clear the flags. */
-                       flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 |
-                                        RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 |
-                                        RADV_CMD_FLAG_INV_VMEM_L1);
+                       flush_bits &= ~(RADV_CMD_FLAG_INV_L2 |
+                                        RADV_CMD_FLAG_WB_L2 |
+                                        RADV_CMD_FLAG_INV_VCACHE);
                }
                assert(flush_cnt);
                (*flush_cnt)++;
@@ -898,16 +955,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
         */
        if ((cp_coher_cntl ||
             (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-                           RADV_CMD_FLAG_INV_VMEM_L1 |
-                           RADV_CMD_FLAG_INV_GLOBAL_L2 |
-                           RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
+                           RADV_CMD_FLAG_INV_VCACHE |
+                           RADV_CMD_FLAG_INV_L2 |
+                           RADV_CMD_FLAG_WB_L2))) &&
            !is_mec) {
                radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
                radeon_emit(cs, 0);
        }
 
-       if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
-           (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
+       if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
+           (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
                si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
                                    cp_coher_cntl |
                                    S_0085F0_TC_ACTION_ENA(1) |
@@ -915,7 +972,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
                                    S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
                cp_coher_cntl = 0;
        } else {
-               if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
+               if(flush_bits & RADV_CMD_FLAG_WB_L2) {
                        /* WB = write-back
                         * NC = apply to non-coherent MTYPEs
                         *      (i.e. MTYPE <= 1, which is what we use everywhere)
@@ -929,7 +986,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
                                            S_0301F0_TC_NC_ACTION_ENA(1));
                        cp_coher_cntl = 0;
                }
-               if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
+               if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
                        si_emit_acquire_mem(cs, is_mec,
                                            chip_class >= GFX9,
                                            cp_coher_cntl |