X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_shader.h;h=bfd2787a1234f26e2292df0e1340b2f12bb9229e;hb=2dd8dfd9137ac561aac3c453c1c7ad6683bd17b4;hp=b0517b73a42878c6ea7f1a3e656debbd34405636;hpb=2cfba40eea4c3b5529ad14d4786c96013a416f30;p=mesa.git diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index b0517b73a42..bfd2787a123 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -46,6 +46,8 @@ // Match MAX_SETS from radv_descriptor_set.h #define RADV_UD_MAX_SETS MAX_SETS +#define RADV_NUM_PHYSICAL_VGPRS 256 + struct radv_shader_module { struct nir_shader *nir; unsigned char sha1[20]; @@ -53,16 +55,185 @@ struct radv_shader_module { char data[0]; }; +enum { + RADV_ALPHA_ADJUST_NONE = 0, + RADV_ALPHA_ADJUST_SNORM = 1, + RADV_ALPHA_ADJUST_SINT = 2, + RADV_ALPHA_ADJUST_SSCALED = 3, +}; + +struct radv_vs_variant_key { + uint32_t instance_rate_inputs; + uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; + uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS]; + uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS]; + uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS]; + uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS]; + + /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. + * so we may need to fix it up. */ + uint64_t alpha_adjust; + + /* For some formats the channels have to be shuffled. */ + uint32_t post_shuffle; + + uint32_t as_es:1; + uint32_t as_ls:1; + uint32_t export_prim_id:1; + uint32_t export_layer_id:1; +}; + +struct radv_tes_variant_key { + uint32_t as_es:1; + uint32_t export_prim_id:1; + uint32_t export_layer_id:1; + uint8_t num_patches; + uint8_t tcs_num_outputs; +}; + +struct radv_tcs_variant_key { + struct radv_vs_variant_key vs_key; + unsigned primitive_mode; + unsigned input_vertices; + unsigned num_inputs; + uint32_t tes_reads_tess_factors:1; +}; + +struct radv_fs_variant_key { + uint32_t col_format; + uint8_t log2_ps_iter_samples; + uint8_t num_samples; + uint32_t is_int8; + uint32_t is_int10; +}; + +struct radv_shader_variant_key { + union { + struct radv_vs_variant_key vs; + struct radv_fs_variant_key fs; + struct radv_tes_variant_key tes; + struct radv_tcs_variant_key tcs; + }; + bool has_multiview_view_index; +}; + +struct radv_nir_compiler_options { + struct radv_pipeline_layout *layout; + struct radv_shader_variant_key key; + bool unsafe_math; + bool supports_spill; + bool clamp_shadow_reference; + bool dump_shader; + bool dump_preoptir; + bool record_llvm_ir; + bool check_ir; + enum radeon_family family; + enum chip_class chip_class; + uint32_t tess_offchip_block_dw_size; + uint32_t address32_hi; +}; + +enum radv_ud_index { + AC_UD_SCRATCH_RING_OFFSETS = 0, + AC_UD_PUSH_CONSTANTS = 1, + AC_UD_INLINE_PUSH_CONSTANTS = 2, + AC_UD_INDIRECT_DESCRIPTOR_SETS = 3, + AC_UD_VIEW_INDEX = 4, + AC_UD_STREAMOUT_BUFFERS = 5, + AC_UD_SHADER_START = 6, + AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, + AC_UD_VS_BASE_VERTEX_START_INSTANCE, + AC_UD_VS_MAX_UD, + AC_UD_PS_MAX_UD, + AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, + AC_UD_CS_MAX_UD, + AC_UD_GS_MAX_UD, + AC_UD_TCS_MAX_UD, + AC_UD_TES_MAX_UD, + AC_UD_MAX_UD = AC_UD_TCS_MAX_UD, +}; + +struct radv_stream_output { + uint8_t location; + uint8_t buffer; + uint16_t offset; + uint8_t component_mask; + uint8_t stream; +}; + +struct radv_streamout_info { + uint16_t num_outputs; + struct radv_stream_output outputs[MAX_SO_OUTPUTS]; + uint16_t strides[MAX_SO_BUFFERS]; + uint32_t enabled_stream_buffers_mask; +}; + +struct radv_shader_info { + bool loads_push_constants; + bool loads_dynamic_offsets; + uint8_t min_push_constant_used; + uint8_t max_push_constant_used; + bool has_only_32bit_push_constants; + bool has_indirect_push_constants; + uint8_t num_inline_push_consts; + uint8_t base_inline_push_consts; + uint32_t desc_set_used_mask; + bool needs_multiview_view_index; + bool uses_invocation_id; + bool uses_prim_id; + struct { + uint64_t ls_outputs_written; + uint8_t input_usage_mask[VERT_ATTRIB_MAX]; + uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; + bool has_vertex_buffers; /* needs vertex buffers and base/start */ + bool needs_draw_id; + bool needs_instance_id; + } vs; + struct { + uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; + uint8_t num_stream_output_components[4]; + uint8_t output_streams[VARYING_SLOT_VAR31 + 1]; + uint8_t max_stream; + } gs; + struct { + uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; + } tes; + struct { + bool force_persample; + bool needs_sample_positions; + bool uses_input_attachments; + bool writes_memory; + bool writes_z; + bool writes_stencil; + bool writes_sample_mask; + bool has_pcoord; + bool prim_id_input; + bool layer_input; + uint8_t num_input_clips_culls; + } ps; + struct { + bool uses_grid_size; + bool uses_block_id[3]; + bool uses_thread_id[3]; + bool uses_local_invocation_idx; + } cs; + struct { + uint64_t outputs_written; + uint64_t patch_outputs_written; + } tcs; + + struct radv_streamout_info so; +}; + struct radv_userdata_info { int8_t sgpr_idx; uint8_t num_sgprs; - bool indirect; - uint32_t indirect_offset; }; struct radv_userdata_locations { struct radv_userdata_info descriptor_sets[RADV_UD_MAX_SETS]; struct radv_userdata_info shader_data[AC_UD_MAX_UD]; + uint32_t descriptor_sets_enabled; }; struct radv_vs_output_info { @@ -83,7 +254,7 @@ struct radv_es_output_info { struct radv_shader_variant_info { struct radv_userdata_locations user_sgprs_locs; - struct ac_shader_info info; + struct radv_shader_info info; unsigned num_user_sgprs; unsigned num_input_sgprs; unsigned num_input_vgprs; @@ -96,12 +267,12 @@ struct radv_shader_variant_info { unsigned vgpr_comp_cnt; bool as_es; bool as_ls; - uint64_t outputs_written; } vs; struct { unsigned num_interp; uint32_t input_mask; uint32_t flat_shaded_mask; + uint32_t float16_shaded_mask; bool can_discard; bool early_fragment_test; } fs; @@ -119,11 +290,8 @@ struct radv_shader_variant_info { } gs; struct { unsigned tcs_vertices_out; - /* Which outputs are actually written */ - uint64_t outputs_written; - /* Which patch outputs are actually written */ - uint32_t patch_outputs_written; - + uint32_t num_patches; + uint32_t lds_size; } tcs; struct { struct radv_vs_output_info outinfo; @@ -153,6 +321,7 @@ struct radv_shader_variant { uint32_t spirv_size; struct nir_shader *nir; char *disasm_string; + char *llvm_ir_string; struct list_head slab_list; }; @@ -166,14 +335,20 @@ struct radv_shader_slab { }; void -radv_optimize_nir(struct nir_shader *shader); +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, + bool allow_copies); +bool +radv_nir_lower_ycbcr_textures(nir_shader *shader, + const struct radv_pipeline_layout *layout); nir_shader * radv_shader_compile_to_nir(struct radv_device *device, struct radv_shader_module *module, const char *entrypoint_name, gl_shader_stage stage, - const VkSpecializationInfo *spec_info); + const VkSpecializationInfo *spec_info, + const VkPipelineCreateFlags flags, + const struct radv_pipeline_layout *layout); void * radv_alloc_shader_memory(struct radv_device *device, @@ -188,7 +363,7 @@ radv_shader_variant_create(struct radv_device *device, struct nir_shader *const *shaders, int shader_count, struct radv_pipeline_layout *layout, - const struct ac_shader_variant_key *key, + const struct radv_shader_variant_key *key, void **code_out, unsigned *code_size_out); @@ -212,11 +387,14 @@ radv_shader_dump_stats(struct radv_device *device, static inline bool radv_can_dump_shader(struct radv_device *device, - struct radv_shader_module *module) + struct radv_shader_module *module, + bool is_gs_copy_shader) { + if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) + return false; + /* Only dump non-meta shaders, useful for debugging purposes. */ - return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS && - module && !module->nir; + return (module && !module->nir) || is_gs_copy_shader; } static inline bool @@ -228,4 +406,27 @@ radv_can_dump_shader_stats(struct radv_device *device, module && !module->nir; } +static inline unsigned shader_io_get_unique_index(gl_varying_slot slot) +{ + /* handle patch indices separate */ + if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) + return 0; + if (slot == VARYING_SLOT_TESS_LEVEL_INNER) + return 1; + if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) + return 2 + (slot - VARYING_SLOT_PATCH0); + if (slot == VARYING_SLOT_POS) + return 0; + if (slot == VARYING_SLOT_PSIZ) + return 1; + if (slot == VARYING_SLOT_CLIP_DIST0) + return 2; + if (slot == VARYING_SLOT_CLIP_DIST1) + return 3; + /* 3 is reserved for clip dist as well */ + if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) + return 4 + (slot - VARYING_SLOT_VAR0); + unreachable("illegal slot in get unique index\n"); +} + #endif