anv: use derefs for shared memory access
authorRhys Perry <pendingchaos02@gmail.com>
Wed, 5 Jun 2019 13:14:23 +0000 (14:14 +0100)
committerJason Ekstrand <jason@jlekstrand.net>
Thu, 8 Aug 2019 17:10:39 +0000 (12:10 -0500)
vkpipeline-db for my Skylake GPU:
total instructions in shared programs: 8847602 -> 8847896 (<.01%)
instructions in affected programs: 10165 -> 10459 (2.89%)
helped: 8
HURT: 2

total cycles in shared programs: 1606273555 -> 1606251634 (<.01%)
cycles in affected programs: 2201803 -> 2179882 (-1.00%)
helped: 7
HURT: 3

The shaders with more instructions is due to a loop over a shared array
in Three Kingdoms being unrolled (and creating a lot of nested ifs). Not sure
if that's good or bad.

One of the shaders with worse cycles is only worse by 0.04% and the other
two are the shaders with loops unrolled.

v2: add patch
v4: don't set spirv_options.shared_addr_format
v4: move comment concerning the shared address format used and NULL
v4: add vkpipeline-db results
v5: rename to nir_lower_vars_to_explicit_types
v5: move setting of total_shared to outside brw_compile_cs
v6: set shared_addr_format
v6: formatting changes

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> (v5)
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/intel/vulkan/anv_pipeline.c

index 38b05380c1b0e949b0b9fa1105a4cc1a6d561467..c6fad1334f45251f178eea5a4d1b59fe9584e11d 100644 (file)
@@ -166,7 +166,6 @@ anv_shader_compile_to_nir(struct anv_device *device,
       .module = module,
    };
    struct spirv_to_nir_options spirv_options = {
-      .lower_workgroup_access_to_offsets = true,
       .frag_coord_is_sysval = true,
       .caps = {
          .demote_to_helper_invocation = true,
@@ -663,9 +662,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
    if (nir->info.stage != MESA_SHADER_COMPUTE)
       NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);
 
-   if (nir->info.stage == MESA_SHADER_COMPUTE)
-      prog_data->total_shared = nir->num_shared;
-
    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 
    if (nir->num_uniforms > 0) {
@@ -1353,6 +1349,18 @@ fail:
    return result;
 }
 
+static void
+shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+   assert(glsl_type_is_vector_or_scalar(type));
+
+   uint32_t comp_size = glsl_type_is_boolean(type)
+      ? 4 : glsl_get_bit_size(type) / 8;
+   unsigned length = glsl_get_vector_elements(type);
+   *size = comp_size * length,
+   *align = comp_size * (length == 3 ? 4 : length);
+}
+
 VkResult
 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
                         struct anv_pipeline_cache *cache,
@@ -1430,6 +1438,13 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
       NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,
                  &stage.prog_data.cs);
 
+      NIR_PASS_V(stage.nir, nir_lower_vars_to_explicit_types,
+                 nir_var_mem_shared, shared_type_info);
+      NIR_PASS_V(stage.nir, nir_lower_explicit_io,
+                 nir_var_mem_shared, nir_address_format_32bit_offset);
+
+      stage.prog_data.cs.base.total_shared = stage.nir->info.cs.shared_size;
+
       const unsigned *shader_code =
          brw_compile_cs(compiler, pipeline->device, mem_ctx, &stage.key.cs,
                         &stage.prog_data.cs, stage.nir, -1, NULL);