radv: add support for shader stats dump

author Dave Airlie <airlied@redhat.com>

Tue, 22 Nov 2016 04:17:49 +0000 (04:17 +0000)

committer Dave Airlie <airlied@redhat.com>

Tue, 22 Nov 2016 07:20:17 +0000 (07:20 +0000)
author Dave Airlie <airlied@redhat.com>
Tue, 22 Nov 2016 04:17:49 +0000 (04:17 +0000)
committer Dave Airlie <airlied@redhat.com>
Tue, 22 Nov 2016 07:20:17 +0000 (07:20 +0000)
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c

index 85959735d299f64f4244c1e3bd1a5d9f5ddbabbc..00f3a07367e340bf88d04dad515f22cd7034c2e9 100644 (file)
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -666,6 +666,7 @@ VkResult radv_CreateDevice(
         }
         device->allow_fast_clears = env_var_as_boolean("RADV_FAST_CLEARS", false);
         device->allow_dcc = !env_var_as_boolean("RADV_DCC_DISABLE", false);
+       device->shader_stats_dump = env_var_as_boolean("RADV_SHADER_STATS", false);
  
         if (device->allow_fast_clears && device->allow_dcc)
                 radv_finishme("DCC fast clears have not been tested\n");
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c

index 404e840901575c6e5ac8cd94a83b656dbc91481c..7d7d0c672f92893c1619ed5b0e1dd9cea7b26b4c 100644 (file)
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -257,6 +257,81 @@ radv_shader_compile_to_nir(struct radv_device *device,
         return nir;
  }
  
+static const char *radv_get_shader_name(struct radv_shader_variant *var,
+                                       gl_shader_stage stage)
+{
+       switch (stage) {
+       case MESA_SHADER_VERTEX: return "Vertex Shader as VS";
+       case MESA_SHADER_FRAGMENT: return "Pixel Shader";
+       case MESA_SHADER_COMPUTE: return "Compute Shader";
+       default:
+               return "Unknown shader";
+       };
+
+}
+static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
+{
+       unsigned lds_increment = device->instance->physicalDevice.rad_info.chip_class >= CIK ? 512 : 256;
+       struct radv_shader_variant *var;
+       struct ac_shader_config *conf;
+       int i;
+       FILE *file = stderr;
+       unsigned max_simd_waves = 10;
+       unsigned lds_per_wave = 0;
+
+       for (i = 0; i < MESA_SHADER_STAGES; i++) {
+               if (!pipeline->shaders[i])
+                       continue;
+               var = pipeline->shaders[i];
+
+               conf = &var->config;
+
+               if (i == MESA_SHADER_FRAGMENT) {
+                       lds_per_wave = conf->lds_size * lds_increment +
+                               align(var->info.fs.num_interp * 48, lds_increment);
+               }
+
+               if (conf->num_sgprs) {
+                       if (device->instance->physicalDevice.rad_info.chip_class >= VI)
+                               max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
+                       else
+                               max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
+               }
+
+               if (conf->num_vgprs)
+                       max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+
+               /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
+                * that PS can use.
+                */
+               if (lds_per_wave)
+                       max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+
+               fprintf(file, "\n%s:\n",
+                       radv_get_shader_name(var, i));
+               if (i == MESA_SHADER_FRAGMENT) {
+                       fprintf(file, "*** SHADER CONFIG ***\n"
+                               "SPI_PS_INPUT_ADDR = 0x%04x\n"
+                               "SPI_PS_INPUT_ENA  = 0x%04x\n",
+                               conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+               }
+               fprintf(file, "*** SHADER STATS ***\n"
+                       "SGPRS: %d\n"
+                       "VGPRS: %d\n"
+                       "Spilled SGPRs: %d\n"
+                       "Spilled VGPRs: %d\n"
+                       "Code Size: %d bytes\n"
+                       "LDS: %d blocks\n"
+                       "Scratch: %d bytes per wave\n"
+                       "Max Waves: %d\n"
+                       "********************\n\n\n",
+                       conf->num_sgprs, conf->num_vgprs,
+                       conf->spilled_sgprs, conf->spilled_vgprs, var->code_size,
+                       conf->lds_size, conf->scratch_bytes_per_wave,
+                       max_simd_waves);
+       }
+}
+
  void radv_shader_variant_destroy(struct radv_device *device,
                                   struct radv_shader_variant *variant)
  {
@@ -297,6 +372,7 @@ struct radv_shader_variant *radv_shader_variant_create(struct radv_device *devic
                               &variant->info, shader, &options, dump);
         LLVMDisposeTargetMachine(tm);
  
+       variant->code_size = binary.code_size;
         bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
         unsigned vgpr_comp_cnt = 0;
  
@@ -1319,6 +1395,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
                 pipeline->binding_stride[desc->binding] = desc->stride;
         }
  
+       if (device->shader_stats_dump) {
+               radv_dump_pipeline_stats(device, pipeline);
+       }
+
         return VK_SUCCESS;
  }
  
@@ -1412,6 +1492,10 @@ static VkResult radv_compute_pipeline_create(
                                        pipeline->layout, NULL, dump);
  
         *pPipeline = radv_pipeline_to_handle(pipeline);
+
+       if (device->shader_stats_dump) {
+               radv_dump_pipeline_stats(device, pipeline);
+       }
         return VK_SUCCESS;
  }
  VkResult radv_CreateComputePipelines(
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h

index 3d17895a1181da72f3bfe64ce734e089e027475d..a5d13a9239a9c3cde86b58c5dba8e727ad002879 100644 (file)
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -444,6 +444,7 @@ struct radv_device {
  
         bool allow_fast_clears;
         bool allow_dcc;
+       bool shader_stats_dump;
  
         /* MSAA sample locations.
          * The first index is the sample index.
@@ -794,6 +795,7 @@ struct radv_shader_variant {
         struct ac_shader_variant_info info;
         unsigned rsrc1;
         unsigned rsrc2;
+       uint32_t code_size;
  };
  
  struct radv_depth_stencil_state {
author	Dave Airlie <airlied@redhat.com>
	Tue, 22 Nov 2016 04:17:49 +0000 (04:17 +0000)
committer	Dave Airlie <airlied@redhat.com>
	Tue, 22 Nov 2016 07:20:17 +0000 (07:20 +0000)
src/amd/vulkan/radv_device.c		patch \| blob \| history
src/amd/vulkan/radv_pipeline.c		patch \| blob \| history
src/amd/vulkan/radv_private.h		patch \| blob \| history