X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_shader.h;h=bfd2787a1234f26e2292df0e1340b2f12bb9229e;hb=2dd8dfd9137ac561aac3c453c1c7ad6683bd17b4;hp=40e92b52f38c3f1f1c4a67f867b232d47e75a9c5;hpb=9d0d806332a32cd60b4f53fe805650751001d169;p=mesa.git diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 40e92b52f38..bfd2787a123 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -46,6 +46,8 @@ // Match MAX_SETS from radv_descriptor_set.h #define RADV_UD_MAX_SETS MAX_SETS +#define RADV_NUM_PHYSICAL_VGPRS 256 + struct radv_shader_module { struct nir_shader *nir; unsigned char sha1[20]; @@ -53,16 +55,38 @@ struct radv_shader_module { char data[0]; }; +enum { + RADV_ALPHA_ADJUST_NONE = 0, + RADV_ALPHA_ADJUST_SNORM = 1, + RADV_ALPHA_ADJUST_SINT = 2, + RADV_ALPHA_ADJUST_SSCALED = 3, +}; + struct radv_vs_variant_key { uint32_t instance_rate_inputs; + uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; + uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS]; + uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS]; + uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS]; + uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS]; + + /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. + * so we may need to fix it up. */ + uint64_t alpha_adjust; + + /* For some formats the channels have to be shuffled. */ + uint32_t post_shuffle; + uint32_t as_es:1; uint32_t as_ls:1; uint32_t export_prim_id:1; + uint32_t export_layer_id:1; }; struct radv_tes_variant_key { uint32_t as_es:1; uint32_t export_prim_id:1; + uint32_t export_layer_id:1; uint8_t num_patches; uint8_t tcs_num_outputs; }; @@ -78,10 +102,9 @@ struct radv_tcs_variant_key { struct radv_fs_variant_key { uint32_t col_format; uint8_t log2_ps_iter_samples; - uint8_t log2_num_samples; + uint8_t num_samples; uint32_t is_int8; uint32_t is_int10; - uint32_t multisample : 1; }; struct radv_shader_variant_key { @@ -103,21 +126,24 @@ struct radv_nir_compiler_options { bool dump_shader; bool dump_preoptir; bool record_llvm_ir; + bool check_ir; enum radeon_family family; enum chip_class chip_class; uint32_t tess_offchip_block_dw_size; + uint32_t address32_hi; }; enum radv_ud_index { AC_UD_SCRATCH_RING_OFFSETS = 0, AC_UD_PUSH_CONSTANTS = 1, - AC_UD_INDIRECT_DESCRIPTOR_SETS = 2, - AC_UD_VIEW_INDEX = 3, - AC_UD_SHADER_START = 4, + AC_UD_INLINE_PUSH_CONSTANTS = 2, + AC_UD_INDIRECT_DESCRIPTOR_SETS = 3, + AC_UD_VIEW_INDEX = 4, + AC_UD_STREAMOUT_BUFFERS = 5, + AC_UD_SHADER_START = 6, AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, AC_UD_VS_BASE_VERTEX_START_INSTANCE, AC_UD_VS_MAX_UD, - AC_UD_PS_SAMPLE_POS_OFFSET = AC_UD_SHADER_START, AC_UD_PS_MAX_UD, AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, AC_UD_CS_MAX_UD, @@ -126,8 +152,31 @@ enum radv_ud_index { AC_UD_TES_MAX_UD, AC_UD_MAX_UD = AC_UD_TCS_MAX_UD, }; + +struct radv_stream_output { + uint8_t location; + uint8_t buffer; + uint16_t offset; + uint8_t component_mask; + uint8_t stream; +}; + +struct radv_streamout_info { + uint16_t num_outputs; + struct radv_stream_output outputs[MAX_SO_OUTPUTS]; + uint16_t strides[MAX_SO_BUFFERS]; + uint32_t enabled_stream_buffers_mask; +}; + struct radv_shader_info { bool loads_push_constants; + bool loads_dynamic_offsets; + uint8_t min_push_constant_used; + uint8_t max_push_constant_used; + bool has_only_32bit_push_constants; + bool has_indirect_push_constants; + uint8_t num_inline_push_consts; + uint8_t base_inline_push_consts; uint32_t desc_set_used_mask; bool needs_multiview_view_index; bool uses_invocation_id; @@ -140,6 +189,12 @@ struct radv_shader_info { bool needs_draw_id; bool needs_instance_id; } vs; + struct { + uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; + uint8_t num_stream_output_components[4]; + uint8_t output_streams[VARYING_SLOT_VAR31 + 1]; + uint8_t max_stream; + } gs; struct { uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; } tes; @@ -154,6 +209,7 @@ struct radv_shader_info { bool has_pcoord; bool prim_id_input; bool layer_input; + uint8_t num_input_clips_culls; } ps; struct { bool uses_grid_size; @@ -165,18 +221,19 @@ struct radv_shader_info { uint64_t outputs_written; uint64_t patch_outputs_written; } tcs; + + struct radv_streamout_info so; }; struct radv_userdata_info { int8_t sgpr_idx; uint8_t num_sgprs; - bool indirect; - uint32_t indirect_offset; }; struct radv_userdata_locations { struct radv_userdata_info descriptor_sets[RADV_UD_MAX_SETS]; struct radv_userdata_info shader_data[AC_UD_MAX_UD]; + uint32_t descriptor_sets_enabled; }; struct radv_vs_output_info { @@ -215,6 +272,7 @@ struct radv_shader_variant_info { unsigned num_interp; uint32_t input_mask; uint32_t flat_shaded_mask; + uint32_t float16_shaded_mask; bool can_discard; bool early_fragment_test; } fs; @@ -277,14 +335,20 @@ struct radv_shader_slab { }; void -radv_optimize_nir(struct nir_shader *shader); +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, + bool allow_copies); +bool +radv_nir_lower_ycbcr_textures(nir_shader *shader, + const struct radv_pipeline_layout *layout); nir_shader * radv_shader_compile_to_nir(struct radv_device *device, struct radv_shader_module *module, const char *entrypoint_name, gl_shader_stage stage, - const VkSpecializationInfo *spec_info); + const VkSpecializationInfo *spec_info, + const VkPipelineCreateFlags flags, + const struct radv_pipeline_layout *layout); void * radv_alloc_shader_memory(struct radv_device *device, @@ -323,11 +387,14 @@ radv_shader_dump_stats(struct radv_device *device, static inline bool radv_can_dump_shader(struct radv_device *device, - struct radv_shader_module *module) + struct radv_shader_module *module, + bool is_gs_copy_shader) { + if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) + return false; + /* Only dump non-meta shaders, useful for debugging purposes. */ - return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS && - module && !module->nir; + return (module && !module->nir) || is_gs_copy_shader; } static inline bool @@ -354,6 +421,8 @@ static inline unsigned shader_io_get_unique_index(gl_varying_slot slot) return 1; if (slot == VARYING_SLOT_CLIP_DIST0) return 2; + if (slot == VARYING_SLOT_CLIP_DIST1) + return 3; /* 3 is reserved for clip dist as well */ if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) return 4 + (slot - VARYING_SLOT_VAR0);