radv/gfx10: Implement bin size calculation.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sat, 20 Jul 2019 23:38:13 +0000 (01:38 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 23 Jul 2019 19:26:59 +0000 (21:26 +0200)
Reviewed-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/vulkan/radv_pipeline.c

index a54d307b563e5ed6092821ae3ecbc8e3015bb3bd..7446096b7ba5fd3f5538454e2c4601bcf33695e7 100644 (file)
@@ -2705,7 +2705,7 @@ struct radv_bin_size_entry {
 };
 
 static VkExtent2D
-radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
+radv_gfx9_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
        static const struct radv_bin_size_entry color_size_table[][3][9] = {
                {
@@ -2975,6 +2975,110 @@ radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCr
        return extent;
 }
 
+static VkExtent2D
+radv_gfx10_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+       RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+       struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+       VkExtent2D extent = {512, 512};
+
+       unsigned sdp_interface_count;
+
+       switch(pipeline->device->physical_device->rad_info.family) {
+       case CHIP_NAVI10:
+       case CHIP_NAVI12:
+               sdp_interface_count = 16;
+               break;
+       case CHIP_NAVI14:
+               sdp_interface_count = 8;
+               break;
+       default:
+               unreachable("Unhandled GFX10 chip");
+       }
+
+       const unsigned db_tag_size = 64;
+       const unsigned db_tag_count = 312;
+       const unsigned color_tag_size = 1024;
+       const unsigned color_tag_count = 31;
+       const unsigned fmask_tag_size = 256;
+       const unsigned fmask_tag_count = 44;
+
+       const unsigned rb_count = pipeline->device->physical_device->rad_info.num_render_backends;
+       const unsigned pipe_count = MAX2(rb_count, sdp_interface_count);
+
+       const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count;
+       const unsigned color_tag_part = (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
+       const unsigned fmask_tag_part = (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count;
+
+       const unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
+       const unsigned samples_log = util_logbase2_ceil(total_samples);
+
+       unsigned color_bytes_per_pixel = 0;
+       unsigned fmask_bytes_per_pixel = 0;
+
+       const VkPipelineColorBlendStateCreateInfo *vkblend = pCreateInfo->pColorBlendState;
+       if (vkblend) {
+               for (unsigned i = 0; i < subpass->color_count; i++) {
+                       if (!vkblend->pAttachments[i].colorWriteMask)
+                               continue;
+
+                       if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+                               continue;
+
+                       VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+                       color_bytes_per_pixel += vk_format_get_blocksize(format);
+
+                       if (total_samples > 1) {
+                               const unsigned fmask_array[] = {0, 1, 1, 4};
+                               fmask_bytes_per_pixel += fmask_array[samples_log];
+                       }
+               }
+
+               color_bytes_per_pixel *= total_samples;
+       }
+       color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1);
+
+       const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel);
+       extent.width = 1ull << ((color_pixel_count_log + 1) / 2);
+       extent.height = 1ull << (color_pixel_count_log / 2);
+
+       if (fmask_bytes_per_pixel) {
+               const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel);
+
+               const VkExtent2D fmask_extent = (VkExtent2D){
+                       .width = 1ull << ((fmask_pixel_count_log + 1) / 2),
+                       .height = 1ull << (color_pixel_count_log / 2)
+               };
+
+               if (fmask_extent.width * fmask_extent.height < extent.width * extent.height)
+                   extent = fmask_extent;
+       }
+
+       if (subpass->depth_stencil_attachment) {
+               struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
+
+               /* Coefficients taken from AMDVLK */
+               unsigned depth_coeff = vk_format_is_depth(attachment->format) ? 5 : 0;
+               unsigned stencil_coeff = vk_format_is_stencil(attachment->format) ? 1 : 0;
+               unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples;
+
+               const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel);
+
+               const VkExtent2D db_extent = (VkExtent2D){
+                       .width = 1ull << ((db_pixel_count_log + 1) / 2),
+                       .height = 1ull << (color_pixel_count_log / 2)
+               };
+
+               if (db_extent.width * db_extent.height < extent.width * extent.height)
+                   extent = db_extent;
+       }
+
+       extent.width = MAX2(extent.width, 128);
+       extent.height = MAX2(extent.width, 64);
+
+       return extent;
+}
+
 static void
 radv_pipeline_generate_disabled_binning_state(struct radeon_cmdbuf *ctx_cs,
                                              struct radv_pipeline *pipeline,
@@ -3027,7 +3131,13 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
        if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
                return;
 
-       VkExtent2D bin_size = radv_compute_bin_size(pipeline, pCreateInfo);
+       VkExtent2D bin_size;
+       if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+               bin_size = radv_gfx10_compute_bin_size(pipeline, pCreateInfo);
+       } else if (pipeline->device->physical_device->rad_info.chip_class == GFX9) {
+               bin_size = radv_gfx9_compute_bin_size(pipeline, pCreateInfo);
+       } else
+               unreachable("Unhandled generation for binning bin size calculation");
 
        if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) {
                unsigned context_states_per_bin; /* allowed range: [1, 6] */