radv: pass sample locations for transitions before depth/stencil resolves
[mesa.git] / src / amd / vulkan / si_cmd_buffer.c
index 56d7a9a36821af166e03de5b4a7f4bfdd1990b6f..126cabd390ae281d6bf46cc89a874597272f27d7 100644 (file)
@@ -31,7 +31,6 @@
 #include "radv_shader.h"
 #include "radv_cs.h"
 #include "sid.h"
-#include "gfx9d.h"
 #include "radv_util.h"
 #include "main/macros.h"
 
@@ -91,16 +90,16 @@ si_emit_compute(struct radv_physical_device *physical_device,
        radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
        /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
        radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
-       radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff));
+       radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
 
        if (physical_device->rad_info.chip_class >= GFX7) {
                /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
                radeon_set_sh_reg_seq(cs,
                                      R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
-               radeon_emit(cs, S_00B864_SH0_CU_EN(0xffff) |
-                           S_00B864_SH1_CU_EN(0xffff));
-               radeon_emit(cs, S_00B868_SH0_CU_EN(0xffff) |
-                           S_00B868_SH1_CU_EN(0xffff));
+               radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) |
+                           S_00B858_SH1_CU_EN(0xffff));
+               radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) |
+                           S_00B858_SH1_CU_EN(0xffff));
        }
 
        /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
@@ -197,9 +196,6 @@ si_emit_graphics(struct radv_physical_device *physical_device,
                radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
                                      S_008A14_CLIP_VTX_REORDER_ENA(1));
 
-       radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
-       radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
-
        if (!physical_device->has_clear_state)
                radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
 
@@ -992,6 +988,11 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
        if (unlikely(cmd_buffer->device->trace_bo))
                radv_cmd_buffer_trace_emit(cmd_buffer);
 
+       /* Clear the caches that have been flushed to avoid syncing too much
+        * when there is some pending active queries.
+        */
+       cmd_buffer->active_query_flush_bits &= ~cmd_buffer->state.flush_bits;
+
        cmd_buffer->state.flush_bits = 0;
 
        /* If the driver used a compute shader for resetting a query pool, it
@@ -1315,16 +1316,19 @@ void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
 static const uint32_t sample_locs_1x =
        FILL_SREG(0, 0,   0, 0,   0, 0,   0, 0);
 static const unsigned max_dist_1x = 0;
+static const uint64_t centroid_priority_1x = 0x0000000000000000ull;
 
 /* 2xMSAA */
 static const uint32_t sample_locs_2x =
        FILL_SREG(4,4,   -4, -4,   0, 0,   0, 0);
 static const unsigned max_dist_2x = 4;
+static const uint64_t centroid_priority_2x = 0x1010101010101010ull;
 
 /* 4xMSAA */
 static const uint32_t sample_locs_4x =
        FILL_SREG(-2,-6,   6, -2,   -6, 2,  2, 6);
 static const unsigned max_dist_4x = 6;
+static const uint64_t centroid_priority_4x = 0x3210321032103210ull;
 
 /* 8xMSAA */
 static const uint32_t sample_locs_8x[] = {
@@ -1335,7 +1339,8 @@ static const uint32_t sample_locs_8x[] = {
        0,
        0,
 };
-static const unsigned max_dist_8x = 8;
+static const unsigned max_dist_8x = 7;
+static const uint64_t centroid_priority_8x = 0x7654321076543210ull;
 
 unsigned radv_get_default_max_sample_dist(int log_samples)
 {
@@ -1353,24 +1358,36 @@ void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples
        switch (nr_samples) {
        default:
        case 1:
+               radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+               radeon_emit(cs, (uint32_t)centroid_priority_1x);
+               radeon_emit(cs, centroid_priority_1x >> 32);
                radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_1x);
                radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_1x);
                radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_1x);
                radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_1x);
                break;
        case 2:
+               radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+               radeon_emit(cs, (uint32_t)centroid_priority_2x);
+               radeon_emit(cs, centroid_priority_2x >> 32);
                radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x);
                radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x);
                radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x);
                radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x);
                break;
        case 4:
+               radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+               radeon_emit(cs, (uint32_t)centroid_priority_4x);
+               radeon_emit(cs, centroid_priority_4x >> 32);
                radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x);
                radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x);
                radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x);
                radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x);
                break;
        case 8:
+               radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+               radeon_emit(cs, (uint32_t)centroid_priority_8x);
+               radeon_emit(cs, centroid_priority_8x >> 32);
                radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
                radeon_emit_array(cs, sample_locs_8x, 4);
                radeon_emit_array(cs, sample_locs_8x, 4);