automake: Link all libGL.so variants with -Bsymbolic.
[mesa.git] / src / amd / vulkan / si_cmd_buffer.c
index 49c99bcab77892ffacf7d5eaaaee46071586a9ca..d94e23b975f2e38c5f74db2fcec5ebe48ca3478c 100644 (file)
@@ -297,6 +297,7 @@ si_emit_config(struct radv_physical_device *physical_device,
                raster_config_1 = 0x0000002a;
                break;
        case CHIP_POLARIS11:
+       case CHIP_POLARIS12:
                raster_config = 0x16000012;
                raster_config_1 = 0x00000000;
                break;
@@ -361,11 +362,6 @@ si_emit_config(struct radv_physical_device *physical_device,
        radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
        radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
 
-       radeon_set_context_reg(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
-       radeon_set_context_reg(cs, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
-       radeon_set_context_reg(cs, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
-       radeon_set_context_reg(cs, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
-
        radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
        radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
        radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
@@ -415,16 +411,25 @@ si_emit_config(struct radv_physical_device *physical_device,
        }
 
        if (physical_device->rad_info.chip_class >= VI) {
+               uint32_t vgt_tess_distribution;
                radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL,
                                       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
                                       S_028424_OVERWRITE_COMBINER_WATERMARK(4));
-               radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
+               if (physical_device->rad_info.family < CHIP_POLARIS10)
+                       radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
                radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
+
+               vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
+                       S_028B50_ACCUM_TRI(11) |
+                       S_028B50_ACCUM_QUAD(11) |
+                       S_028B50_DONUT_SPLIT(16);
+
+               if (physical_device->rad_info.family == CHIP_FIJI ||
+                   physical_device->rad_info.family >= CHIP_POLARIS10)
+                       vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
+
                radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
-                                      S_028B50_ACCUM_ISOLINE(32) |
-                                      S_028B50_ACCUM_TRI(11) |
-                                      S_028B50_ACCUM_QUAD(11) |
-                                      S_028B50_DONUT_SPLIT(16));
+                                      vgt_tess_distribution);
        } else {
                radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
                radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
@@ -506,21 +511,7 @@ si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
 {
        int i;
 
-       if (count == 0) {
-               radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
-               radeon_emit(cs, fui(1.0));
-               radeon_emit(cs, fui(0.0));
-               radeon_emit(cs, fui(1.0));
-               radeon_emit(cs, fui(0.0));
-               radeon_emit(cs, fui(1.0));
-               radeon_emit(cs, fui(0.0));
-
-               radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
-               radeon_emit(cs, fui(0.0));
-               radeon_emit(cs, fui(1.0));
-
-               return;
-       }
+       assert(count);
        radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
                                   first_vp * 4 * 6, count * 6);
 
@@ -547,22 +538,75 @@ si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
        }
 }
 
+static VkRect2D si_scissor_from_viewport(const VkViewport *viewport)
+{
+       float scale[3], translate[3];
+       VkRect2D rect;
+
+       get_viewport_xform(viewport, scale, translate);
+
+       rect.offset.x = translate[0] - abs(scale[0]);
+       rect.offset.y = translate[1] - abs(scale[1]);
+       rect.extent.width = ceilf(translate[0] + abs(scale[0])) - rect.offset.x;
+       rect.extent.height = ceilf(translate[1] + abs(scale[1])) - rect.offset.y;
+
+       return rect;
+}
+
+static VkRect2D si_intersect_scissor(const VkRect2D *a, const VkRect2D *b) {
+       VkRect2D ret;
+       ret.offset.x = MAX2(a->offset.x, b->offset.x);
+       ret.offset.y = MAX2(a->offset.y, b->offset.y);
+       ret.extent.width = MIN2(a->offset.x + a->extent.width,
+                               b->offset.x + b->extent.width) - ret.offset.x;
+       ret.extent.height = MIN2(a->offset.y + a->extent.height,
+                                b->offset.y + b->extent.height) - ret.offset.y;
+       return ret;
+}
+
 void
 si_write_scissors(struct radeon_winsys_cs *cs, int first,
-                  int count, const VkRect2D *scissors)
+                  int count, const VkRect2D *scissors,
+                  const VkViewport *viewports, bool can_use_guardband)
 {
        int i;
-       if (count == 0)
-               return;
+       float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
+       const float max_range = 32767.0f;
+       assert(count);
 
        radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
        for (i = 0; i < count; i++) {
-               radeon_emit(cs, S_028250_TL_X(scissors[i].offset.x) |
-                           S_028250_TL_Y(scissors[i].offset.y) |
+               VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i);
+               VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor);
+
+               get_viewport_xform(viewports + i, scale, translate);
+               scale[0] = abs(scale[0]);
+               scale[1] = abs(scale[1]);
+
+               if (scale[0] < 0.5)
+                       scale[0] = 0.5;
+               if (scale[1] < 0.5)
+                       scale[1] = 0.5;
+
+               guardband_x = MIN2(guardband_x, (max_range - abs(translate[0])) / scale[0]);
+               guardband_y = MIN2(guardband_y, (max_range - abs(translate[1])) / scale[1]);
+
+               radeon_emit(cs, S_028250_TL_X(scissor.offset.x) |
+                           S_028250_TL_Y(scissor.offset.y) |
                            S_028250_WINDOW_OFFSET_DISABLE(1));
-               radeon_emit(cs, S_028254_BR_X(scissors[i].offset.x + scissors[i].extent.width) |
-                           S_028254_BR_Y(scissors[i].offset.y + scissors[i].extent.height));
+               radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) |
+                           S_028254_BR_Y(scissor.offset.y + scissor.extent.height));
+       }
+       if (!can_use_guardband) {
+               guardband_x = 1.0;
+               guardband_y = 1.0;
        }
+
+       radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+       radeon_emit(cs, fui(guardband_y));
+       radeon_emit(cs, fui(1.0));
+       radeon_emit(cs, fui(guardband_x));
+       radeon_emit(cs, fui(1.0));
 }
 
 static inline unsigned
@@ -600,13 +644,42 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
        uint32_t num_prims = radv_prims_for_vertices(&cmd_buffer->state.pipeline->graphics.prim_vertex_count, draw_vertex_count);
        bool multi_instances_smaller_than_primgroup;
 
-       if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
+       if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
+               primgroup_size = cmd_buffer->state.pipeline->graphics.tess.num_patches;
+       else if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
                primgroup_size = 64;  /* recommended with a GS */
 
        multi_instances_smaller_than_primgroup = indirect_draw || (instanced_draw &&
                                                                   num_prims < primgroup_size);
-       /* TODO TES */
+       if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
+               /* SWITCH_ON_EOI must be set if PrimID is used. */
+               if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id ||
+                   cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.uses_prim_id)
+                       ia_switch_on_eoi = true;
+
+               /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
+               if ((family == CHIP_TAHITI ||
+                    family == CHIP_PITCAIRN ||
+                    family == CHIP_BONAIRE) &&
+                   radv_pipeline_has_gs(cmd_buffer->state.pipeline))
+                       partial_vs_wave = true;
 
+               /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
+               if (cmd_buffer->device->has_distributed_tess) {
+                       if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
+                               partial_es_wave = true;
+
+                               if (family == CHIP_TONGA ||
+                                   family == CHIP_FIJI ||
+                                   family == CHIP_POLARIS10 ||
+                                   family == CHIP_POLARIS11 ||
+                                   family == CHIP_POLARIS12)
+                                       partial_vs_wave = true;
+                       } else {
+                               partial_vs_wave = true;
+                       }
+               }
+       }
        /* TODO linestipple */
 
        if (chip_class >= CIK) {
@@ -951,6 +1024,16 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
        radv_cmd_buffer_trace_emit(cmd_buffer);
 }
 
+void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
+                        unsigned size)
+{
+       uint64_t aligned_va = va & ~(CP_DMA_ALIGNMENT - 1);
+       uint64_t aligned_size = ((va + size + CP_DMA_ALIGNMENT -1) & ~(CP_DMA_ALIGNMENT - 1)) - aligned_va;
+
+       si_emit_cp_dma_copy_buffer(cmd_buffer, aligned_va, aligned_va,
+                                  aligned_size, CIK_CP_DMA_USE_L2);
+}
+
 static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
                              uint64_t remaining_size, unsigned *flags)
 {