radv/gfx9: reduce the number of input VGPRs for the GS stage
[mesa.git] / src / amd / vulkan / radv_shader.c
index c9edb28ba2491e99b2de2e670830ec1f47b339b7..31879805ae08d8dfb3b3dbb2c3086722d67f2e4d 100644 (file)
@@ -196,19 +196,22 @@ radv_shader_compile_to_nir(struct radv_device *device,
                                        spec_entries[i].data32 = *(const uint32_t *)data;
                        }
                }
-               const struct nir_spirv_supported_extensions supported_ext = {
-                       .draw_parameters = true,
-                       .float64 = true,
-                       .image_read_without_format = true,
-                       .image_write_without_format = true,
-                       .tessellation = true,
-                       .int64 = true,
-                       .multiview = true,
-                       .variable_pointers = true,
+               const struct spirv_to_nir_options spirv_options = {
+                       .caps = {
+                               .draw_parameters = true,
+                               .float64 = true,
+                               .image_read_without_format = true,
+                               .image_write_without_format = true,
+                               .tessellation = true,
+                               .int64 = true,
+                               .multiview = true,
+                               .variable_pointers = true,
+                       },
                };
                entry_point = spirv_to_nir(spirv, module->size / 4,
                                           spec_entries, num_spec_entries,
-                                          stage, entrypoint_name, &supported_ext, &nir_options);
+                                          stage, entrypoint_name,
+                                          &spirv_options, &nir_options);
                nir = entry_point->shader;
                assert(nir->info.stage == stage);
                nir_validate_shader(nir);
@@ -389,13 +392,18 @@ radv_fill_shader_variant(struct radv_device *device,
                break;
        case MESA_SHADER_FRAGMENT:
                break;
-       case MESA_SHADER_COMPUTE:
+       case MESA_SHADER_COMPUTE: {
+               struct ac_shader_info *info = &variant->info.info;
                variant->rsrc2 |=
-                       S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
-                       S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) |
-                       S_00B84C_TG_SIZE_EN(1) |
+                       S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
+                       S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
+                       S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
+                       S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 :
+                                               info->cs.uses_thread_id[1] ? 1 : 0) |
+                       S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) |
                        S_00B84C_LDS_SIZE(variant->config.lds_size);
                break;
+       }
        default:
                unreachable("unsupported shader type");
                break;
@@ -403,8 +411,21 @@ radv_fill_shader_variant(struct radv_device *device,
 
        if (device->physical_device->rad_info.chip_class >= GFX9 &&
            stage == MESA_SHADER_GEOMETRY) {
+               struct ac_shader_info *info = &variant->info.info;
+               unsigned gs_vgpr_comp_cnt;
+
+               /* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and
+                * VGPR[0:4] are always loaded.
+                */
+               if (info->uses_invocation_id)
+                       gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
+               else if (info->uses_prim_id)
+                       gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
+               else
+                       gs_vgpr_comp_cnt = 1; /* TODO: use input_prim */
+
                /* TODO: Figure out how many we actually need. */
-               variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(3);
+               variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt);
                variant->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(3) |
                                  S_00B22C_OC_LDS_EN(1);
        } else if (device->physical_device->rad_info.chip_class >= GFX9 &&
@@ -429,7 +450,7 @@ shader_variant_create(struct radv_device *device,
                      unsigned *code_size_out)
 {
        enum radeon_family chip_family = device->physical_device->rad_info.family;
-       bool dump_shaders = device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS;
+       bool dump_shaders = radv_can_dump_shader(device, module);
        enum ac_target_machine_options tm_options = 0;
        struct radv_shader_variant *variant;
        struct ac_shader_binary binary;
@@ -541,45 +562,6 @@ radv_shader_variant_destroy(struct radv_device *device,
        free(variant);
 }
 
-uint32_t
-radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class,
-                                bool has_gs, bool has_tess)
-{
-       switch (stage) {
-       case MESA_SHADER_FRAGMENT:
-               return R_00B030_SPI_SHADER_USER_DATA_PS_0;
-       case MESA_SHADER_VERTEX:
-               if (chip_class >= GFX9) {
-                       return has_tess ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
-                              has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
-                              R_00B130_SPI_SHADER_USER_DATA_VS_0;
-               }
-               if (has_tess)
-                       return R_00B530_SPI_SHADER_USER_DATA_LS_0;
-               else
-                       return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0;
-       case MESA_SHADER_GEOMETRY:
-               return chip_class >= GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
-                                           R_00B230_SPI_SHADER_USER_DATA_GS_0;
-       case MESA_SHADER_COMPUTE:
-               return R_00B900_COMPUTE_USER_DATA_0;
-       case MESA_SHADER_TESS_CTRL:
-               return chip_class >= GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
-                                           R_00B430_SPI_SHADER_USER_DATA_HS_0;
-       case MESA_SHADER_TESS_EVAL:
-               if (chip_class >= GFX9) {
-                       return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
-                              R_00B130_SPI_SHADER_USER_DATA_VS_0;
-               }
-               if (has_gs)
-                       return R_00B330_SPI_SHADER_USER_DATA_ES_0;
-               else
-                       return R_00B130_SPI_SHADER_USER_DATA_VS_0;
-       default:
-               unreachable("unknown shader");
-       }
-}
-
 const char *
 radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)
 {
@@ -680,7 +662,7 @@ radv_shader_dump_stats(struct radv_device *device,
        generate_shader_stats(device, variant, stage, buf);
 
        fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
-       fprintf(file, buf->buf);
+       fprintf(file, "%s", buf->buf);
 
        _mesa_string_buffer_destroy(buf);
 }
@@ -703,7 +685,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
        /* Spec doesn't indicate what to do if the stage is invalid, so just
         * return no info for this. */
        if (!variant)
-               return VK_ERROR_FEATURE_NOT_PRESENT;
+               return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
 
        switch (infoType) {
        case VK_SHADER_INFO_TYPE_STATISTICS_AMD: