X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader_nir.c;h=ce05143e559e337f327a56074760cb003b910f47;hp=5ca4e178b86f635b223cf764df40ad43289ee578;hb=76cf43c4f88d263d7973da14136210f7c5b198ea;hpb=f818d9ae3c89f858e93ea0ea3293103b97599e8f diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 5ca4e178b86..ce05143e559 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -22,1245 +22,741 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "si_shader_internal.h" -#include "si_pipe.h" - #include "ac_nir_to_llvm.h" - -#include "tgsi/tgsi_from_mesa.h" - #include "compiler/nir/nir.h" -#include "compiler/nir_types.h" #include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_deref.h" +#include "compiler/nir_types.h" +#include "si_pipe.h" +#include "si_shader_internal.h" +#include "tgsi/tgsi_from_mesa.h" -static nir_variable* tex_get_texture_var(nir_tex_instr *instr) -{ - for (unsigned i = 0; i < instr->num_srcs; i++) { - switch (instr->src[i].src_type) { - case nir_tex_src_texture_deref: - return nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)); - default: - break; - } - } - - return NULL; -} - -static nir_variable* intrinsic_get_var(nir_intrinsic_instr *instr) -{ - return nir_deref_instr_get_variable(nir_src_as_deref(instr->src[0])); -} - -static void gather_intrinsic_load_deref_input_info(const nir_shader *nir, - const nir_intrinsic_instr *instr, - nir_variable *var, - struct tgsi_shader_info *info) -{ - assert(var && var->data.mode == nir_var_shader_in); - - switch (nir->info.stage) { - case MESA_SHADER_VERTEX: { - unsigned i = var->data.driver_location; - unsigned attrib_count = glsl_count_attribute_slots(var->type, false); - uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); - - for (unsigned j = 0; j < attrib_count; j++, i++) { - if (glsl_type_is_64bit(glsl_without_array(var->type))) { - unsigned dmask = mask; - - if (glsl_type_is_dual_slot(glsl_without_array(var->type)) && j % 2) - dmask >>= 2; - - dmask <<= var->data.location_frac / 2; - - if (dmask & 0x1) - info->input_usage_mask[i] |= TGSI_WRITEMASK_XY; - if (dmask & 0x2) - info->input_usage_mask[i] |= TGSI_WRITEMASK_ZW; - } else { - info->input_usage_mask[i] |= - (mask << var->data.location_frac) & 0xf; - } - } - break; - } - case MESA_SHADER_FRAGMENT: - if (var->data.location == VARYING_SLOT_COL0 || - var->data.location == VARYING_SLOT_COL1) { - unsigned index = var->data.location == VARYING_SLOT_COL1; - uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); - info->colors_read |= mask << (index * 4); - } - break; - default:; - } -} - -static void gather_intrinsic_load_deref_output_info(const nir_shader *nir, - const nir_intrinsic_instr *instr, - nir_variable *var, - struct tgsi_shader_info *info) +static const nir_deref_instr *tex_get_texture_deref(nir_tex_instr *instr) { - assert(var && var->data.mode == nir_var_shader_out); - - switch (nir->info.stage) { - case MESA_SHADER_TESS_CTRL: - if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || - var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) - info->reads_tessfactor_outputs = true; - else if (var->data.patch) - info->reads_perpatch_outputs = true; - else - info->reads_pervertex_outputs = true; - break; - - case MESA_SHADER_FRAGMENT: - if (var->data.fb_fetch_output) - info->uses_fbfetch = true; - break; - default:; - } + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_texture_deref: + return nir_src_as_deref(instr->src[i].src); + default: + break; + } + } + + return NULL; } -static void gather_intrinsic_store_deref_output_info(const nir_shader *nir, - const nir_intrinsic_instr *instr, - nir_variable *var, - struct tgsi_shader_info *info) +static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr, + bool is_input) { - assert(var && var->data.mode == nir_var_shader_out); - - switch (nir->info.stage) { - case MESA_SHADER_VERTEX: /* needed by LS, ES */ - case MESA_SHADER_TESS_EVAL: /* needed by ES */ - case MESA_SHADER_GEOMETRY: { - unsigned i = var->data.driver_location; - unsigned attrib_count = glsl_count_attribute_slots(var->type, false); - unsigned mask = nir_intrinsic_write_mask(instr); - - assert(!var->data.compact); - - for (unsigned j = 0; j < attrib_count; j++, i++) { - if (glsl_type_is_64bit(glsl_without_array(var->type))) { - unsigned dmask = mask; - - if (glsl_type_is_dual_slot(glsl_without_array(var->type)) && j % 2) - dmask >>= 2; - - dmask <<= var->data.location_frac / 2; - - if (dmask & 0x1) - info->output_usagemask[i] |= TGSI_WRITEMASK_XY; - if (dmask & 0x2) - info->output_usagemask[i] |= TGSI_WRITEMASK_ZW; - } else { - info->output_usagemask[i] |= - (mask << var->data.location_frac) & 0xf; - } - - } - break; - } - default:; - } + unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */ + + if (intr->intrinsic == nir_intrinsic_load_interpolated_input) { + nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr); + + if (baryc) { + if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0) + interp = nir_intrinsic_interp_mode(baryc); + else + unreachable("unknown barycentric intrinsic"); + } else { + unreachable("unknown barycentric expression"); + } + } + + unsigned mask, bit_size; + bool dual_slot, is_output_load; + + if (nir_intrinsic_infos[intr->intrinsic].index_map[NIR_INTRINSIC_WRMASK] > 0) { + mask = nir_intrinsic_write_mask(intr); /* store */ + bit_size = nir_src_bit_size(intr->src[0]); + dual_slot = bit_size == 64 && nir_src_num_components(intr->src[0]) >= 3; + is_output_load = false; + } else { + mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */ + bit_size = intr->dest.ssa.bit_size; + dual_slot = bit_size == 64 && intr->dest.ssa.num_components >= 3; + is_output_load = !is_input; + } + + /* Convert the 64-bit component mask to a 32-bit component mask. */ + if (bit_size == 64) { + unsigned new_mask = 0; + for (unsigned i = 0; i < 4; i++) { + if (mask & (1 << i)) + new_mask |= 0x3 << (2 * i); + } + mask = new_mask; + } + + /* Convert the 16-bit component mask to a 32-bit component mask. */ + if (bit_size == 16) { + unsigned new_mask = 0; + for (unsigned i = 0; i < 4; i++) { + if (mask & (1 << i)) + new_mask |= 0x1 << (i / 2); + } + mask = new_mask; + } + + mask <<= nir_intrinsic_component(intr); + + nir_src offset = *nir_get_io_offset_src(intr); + bool indirect = !nir_src_is_const(offset); + if (!indirect) + assert(nir_src_as_uint(offset) == 0); + + unsigned semantic = 0; + /* VS doesn't have semantics. */ + if (info->stage != MESA_SHADER_VERTEX || !is_input) + semantic = nir_intrinsic_io_semantics(intr).location; + + if (info->stage == MESA_SHADER_FRAGMENT && !is_input) { + /* Never use FRAG_RESULT_COLOR directly. */ + if (semantic == FRAG_RESULT_COLOR) { + semantic = FRAG_RESULT_DATA0; + info->color0_writes_all_cbufs = true; + } + semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index; + } + + unsigned driver_location = nir_intrinsic_base(intr); + unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : (1 + dual_slot); + + if (is_input) { + assert(driver_location + num_slots <= ARRAY_SIZE(info->input_usage_mask)); + + for (unsigned i = 0; i < num_slots; i++) { + unsigned loc = driver_location + i; + unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf; + + info->input_semantic[loc] = semantic + i; + info->input_interpolate[loc] = interp; + + if (slot_mask) { + info->input_usage_mask[loc] |= slot_mask; + info->num_inputs = MAX2(info->num_inputs, loc + 1); + + if (semantic == VARYING_SLOT_PRIMITIVE_ID) + info->uses_primid = true; + } + } + } else { + /* Outputs. */ + assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask)); + assert(semantic + num_slots < ARRAY_SIZE(info->output_semantic_to_slot)); + + for (unsigned i = 0; i < num_slots; i++) { + unsigned loc = driver_location + i; + unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf; + + info->output_semantic[loc] = semantic + i; + info->output_semantic_to_slot[semantic + i] = loc; + + if (is_output_load) { + /* Output loads have only a few things that we need to track. */ + info->output_readmask[loc] |= slot_mask; + + if (info->stage == MESA_SHADER_FRAGMENT && + nir_intrinsic_io_semantics(intr).fb_fetch_output) + info->uses_fbfetch = true; + } else if (slot_mask) { + /* Output stores. */ + if (info->stage == MESA_SHADER_GEOMETRY) { + unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams << + (nir_intrinsic_component(intr) * 2); + unsigned new_mask = slot_mask & ~info->output_usagemask[loc]; + + for (unsigned i = 0; i < 4; i++) { + unsigned stream = (gs_streams >> (i * 2)) & 0x3; + + if (new_mask & (1 << i)) { + info->output_streams[loc] |= stream << (i * 2); + info->num_stream_output_components[stream]++; + } + } + } + + info->output_usagemask[loc] |= slot_mask; + info->num_outputs = MAX2(info->num_outputs, loc + 1); + + if (info->stage == MESA_SHADER_FRAGMENT) { + switch (semantic) { + case FRAG_RESULT_DEPTH: + info->writes_z = true; + break; + case FRAG_RESULT_STENCIL: + info->writes_stencil = true; + break; + case FRAG_RESULT_SAMPLE_MASK: + info->writes_samplemask = true; + break; + default: + if (semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) { + unsigned index = semantic - FRAG_RESULT_DATA0; + info->colors_written |= 1 << (index + i); + } + break; + } + } else { + switch (semantic) { + case VARYING_SLOT_PRIMITIVE_ID: + info->writes_primid = true; + break; + case VARYING_SLOT_VIEWPORT: + info->writes_viewport_index = true; + break; + case VARYING_SLOT_LAYER: + info->writes_layer = true; + break; + case VARYING_SLOT_PSIZ: + info->writes_psize = true; + break; + case VARYING_SLOT_CLIP_VERTEX: + info->writes_clipvertex = true; + break; + case VARYING_SLOT_EDGE: + info->writes_edgeflag = true; + break; + case VARYING_SLOT_POS: + info->writes_position = true; + break; + } + } + } + } + } } -static void scan_instruction(const struct nir_shader *nir, - struct tgsi_shader_info *info, - nir_instr *instr) +static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info, + nir_instr *instr) { - if (instr->type == nir_instr_type_alu) { - nir_alu_instr *alu = nir_instr_as_alu(instr); - - switch (alu->op) { - case nir_op_fddx: - case nir_op_fddy: - case nir_op_fddx_fine: - case nir_op_fddy_fine: - case nir_op_fddx_coarse: - case nir_op_fddy_coarse: - info->uses_derivatives = true; - break; - default: - break; - } - } else if (instr->type == nir_instr_type_tex) { - nir_tex_instr *tex = nir_instr_as_tex(instr); - nir_variable *texture = tex_get_texture_var(tex); - - if (!texture) { - info->samplers_declared |= - u_bit_consecutive(tex->sampler_index, 1); - } else { - if (texture->data.bindless) - info->uses_bindless_samplers = true; - } - - switch (tex->op) { - case nir_texop_tex: - case nir_texop_txb: - case nir_texop_lod: - info->uses_derivatives = true; - break; - default: - break; - } - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - - switch (intr->intrinsic) { - case nir_intrinsic_load_front_face: - info->uses_frontface = 1; - break; - case nir_intrinsic_load_instance_id: - info->uses_instanceid = 1; - break; - case nir_intrinsic_load_invocation_id: - info->uses_invocationid = true; - break; - case nir_intrinsic_load_num_work_groups: - info->uses_grid_size = true; - break; - case nir_intrinsic_load_local_group_size: - /* The block size is translated to IMM with a fixed block size. */ - if (info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0) - info->uses_block_size = true; - break; - case nir_intrinsic_load_local_invocation_id: - case nir_intrinsic_load_work_group_id: { - unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa); - while (mask) { - unsigned i = u_bit_scan(&mask); - - if (intr->intrinsic == nir_intrinsic_load_work_group_id) - info->uses_block_id[i] = true; - else - info->uses_thread_id[i] = true; - } - break; - } - case nir_intrinsic_load_vertex_id: - info->uses_vertexid = 1; - break; - case nir_intrinsic_load_vertex_id_zero_base: - info->uses_vertexid_nobase = 1; - break; - case nir_intrinsic_load_base_vertex: - info->uses_basevertex = 1; - break; - case nir_intrinsic_load_draw_id: - info->uses_drawid = 1; - break; - case nir_intrinsic_load_primitive_id: - info->uses_primid = 1; - break; - case nir_intrinsic_load_sample_mask_in: - info->reads_samplemask = true; - break; - case nir_intrinsic_load_tess_level_inner: - case nir_intrinsic_load_tess_level_outer: - info->reads_tess_factors = true; - break; - case nir_intrinsic_bindless_image_load: - info->uses_bindless_images = true; - - if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) - info->uses_bindless_buffer_load = true; - else - info->uses_bindless_image_load = true; - break; - case nir_intrinsic_bindless_image_size: - case nir_intrinsic_bindless_image_samples: - info->uses_bindless_images = true; - break; - case nir_intrinsic_bindless_image_store: - info->uses_bindless_images = true; - - if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) - info->uses_bindless_buffer_store = true; - else - info->uses_bindless_image_store = true; - - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ - break; - case nir_intrinsic_image_deref_store: - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ - break; - case nir_intrinsic_bindless_image_atomic_add: - case nir_intrinsic_bindless_image_atomic_min: - case nir_intrinsic_bindless_image_atomic_max: - case nir_intrinsic_bindless_image_atomic_and: - case nir_intrinsic_bindless_image_atomic_or: - case nir_intrinsic_bindless_image_atomic_xor: - case nir_intrinsic_bindless_image_atomic_exchange: - case nir_intrinsic_bindless_image_atomic_comp_swap: - info->uses_bindless_images = true; - - if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) - info->uses_bindless_buffer_atomic = true; - else - info->uses_bindless_image_atomic = true; - - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ - break; - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ - break; - case nir_intrinsic_store_ssbo: - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ - break; - case nir_intrinsic_load_deref: { - nir_variable *var = intrinsic_get_var(intr); - nir_variable_mode mode = var->data.mode; - enum glsl_base_type base_type = - glsl_get_base_type(glsl_without_array(var->type)); - - if (mode == nir_var_shader_in) { - gather_intrinsic_load_deref_input_info(nir, intr, var, info); - - switch (var->data.interpolation) { - case INTERP_MODE_NONE: - if (glsl_base_type_is_integer(base_type)) - break; - - /* fall-through */ - case INTERP_MODE_SMOOTH: - if (var->data.sample) - info->uses_persp_sample = true; - else if (var->data.centroid) - info->uses_persp_centroid = true; - else - info->uses_persp_center = true; - break; - - case INTERP_MODE_NOPERSPECTIVE: - if (var->data.sample) - info->uses_linear_sample = true; - else if (var->data.centroid) - info->uses_linear_centroid = true; - else - info->uses_linear_center = true; - break; - } - } else if (mode == nir_var_shader_out) { - gather_intrinsic_load_deref_output_info(nir, intr, var, info); - } - break; - } - case nir_intrinsic_store_deref: { - nir_variable *var = intrinsic_get_var(intr); - - if (var->data.mode == nir_var_shader_out) - gather_intrinsic_store_deref_output_info(nir, intr, var, info); - break; - } - case nir_intrinsic_interp_deref_at_centroid: - case nir_intrinsic_interp_deref_at_sample: - case nir_intrinsic_interp_deref_at_offset: { - enum glsl_interp_mode interp = intrinsic_get_var(intr)->data.interpolation; - switch (interp) { - case INTERP_MODE_SMOOTH: - case INTERP_MODE_NONE: - if (intr->intrinsic == nir_intrinsic_interp_deref_at_centroid) - info->uses_persp_opcode_interp_centroid = true; - else if (intr->intrinsic == nir_intrinsic_interp_deref_at_sample) - info->uses_persp_opcode_interp_sample = true; - else - info->uses_persp_opcode_interp_offset = true; - break; - case INTERP_MODE_NOPERSPECTIVE: - if (intr->intrinsic == nir_intrinsic_interp_deref_at_centroid) - info->uses_linear_opcode_interp_centroid = true; - else if (intr->intrinsic == nir_intrinsic_interp_deref_at_sample) - info->uses_linear_opcode_interp_sample = true; - else - info->uses_linear_opcode_interp_offset = true; - break; - case INTERP_MODE_FLAT: - break; - default: - unreachable("Unsupported interpoation type"); - } - break; - } - default: - break; - } - } + if (instr->type == nir_instr_type_alu) { + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_fddx: + case nir_op_fddy: + case nir_op_fddx_fine: + case nir_op_fddy_fine: + case nir_op_fddx_coarse: + case nir_op_fddy_coarse: + info->uses_derivatives = true; + break; + default: + break; + } + } else if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + const nir_deref_instr *deref = tex_get_texture_deref(tex); + nir_variable *var = deref ? nir_deref_instr_get_variable(deref) : NULL; + + if (var) { + if (deref->mode != nir_var_uniform || var->data.bindless) + info->uses_bindless_samplers = true; + } + + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_lod: + info->uses_derivatives = true; + break; + default: + break; + } + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_front_face: + info->uses_frontface = 1; + break; + case nir_intrinsic_load_instance_id: + info->uses_instanceid = 1; + break; + case nir_intrinsic_load_invocation_id: + info->uses_invocationid = true; + break; + case nir_intrinsic_load_num_work_groups: + info->uses_grid_size = true; + break; + case nir_intrinsic_load_local_invocation_index: + case nir_intrinsic_load_subgroup_id: + case nir_intrinsic_load_num_subgroups: + info->uses_subgroup_info = true; + break; + case nir_intrinsic_load_local_group_size: + /* The block size is translated to IMM with a fixed block size. */ + if (info->base.cs.local_size[0] == 0) + info->uses_block_size = true; + break; + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_work_group_id: { + unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa); + while (mask) { + unsigned i = u_bit_scan(&mask); + + if (intr->intrinsic == nir_intrinsic_load_work_group_id) + info->uses_block_id[i] = true; + else + info->uses_thread_id[i] = true; + } + break; + } + case nir_intrinsic_load_draw_id: + info->uses_drawid = 1; + break; + case nir_intrinsic_load_primitive_id: + info->uses_primid = 1; + break; + case nir_intrinsic_load_sample_mask_in: + info->reads_samplemask = true; + break; + case nir_intrinsic_load_tess_level_inner: + case nir_intrinsic_load_tess_level_outer: + info->reads_tess_factors = true; + break; + case nir_intrinsic_bindless_image_load: + case nir_intrinsic_bindless_image_size: + case nir_intrinsic_bindless_image_samples: + info->uses_bindless_images = true; + break; + case nir_intrinsic_bindless_image_store: + info->uses_bindless_images = true; + info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ + break; + case nir_intrinsic_image_deref_store: + info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ + break; + case nir_intrinsic_bindless_image_atomic_add: + case nir_intrinsic_bindless_image_atomic_imin: + case nir_intrinsic_bindless_image_atomic_umin: + case nir_intrinsic_bindless_image_atomic_imax: + case nir_intrinsic_bindless_image_atomic_umax: + case nir_intrinsic_bindless_image_atomic_and: + case nir_intrinsic_bindless_image_atomic_or: + case nir_intrinsic_bindless_image_atomic_xor: + case nir_intrinsic_bindless_image_atomic_exchange: + case nir_intrinsic_bindless_image_atomic_comp_swap: + info->uses_bindless_images = true; + info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ + break; + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umax: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_atomic_inc_wrap: + case nir_intrinsic_image_deref_atomic_dec_wrap: + info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ + break; + case nir_intrinsic_store_ssbo: + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + info->writes_memory = true; + info->num_memory_instructions++; /* we only care about stores */ + break; + case nir_intrinsic_load_color0: + case nir_intrinsic_load_color1: { + unsigned index = intr->intrinsic == nir_intrinsic_load_color1; + uint8_t mask = nir_ssa_def_components_read(&intr->dest.ssa); + info->colors_read |= mask << (index * 4); + break; + } + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_centroid: + case nir_intrinsic_load_barycentric_sample: + case nir_intrinsic_load_barycentric_at_offset: /* uses center */ + case nir_intrinsic_load_barycentric_at_sample: { /* uses center */ + unsigned mode = nir_intrinsic_interp_mode(intr); + + if (mode == INTERP_MODE_FLAT) + break; + + if (mode == INTERP_MODE_NOPERSPECTIVE) { + if (intr->intrinsic == nir_intrinsic_load_barycentric_sample) + info->uses_linear_sample = true; + else if (intr->intrinsic == nir_intrinsic_load_barycentric_centroid) + info->uses_linear_centroid = true; + else + info->uses_linear_center = true; + } else { + if (intr->intrinsic == nir_intrinsic_load_barycentric_sample) + info->uses_persp_sample = true; + else if (intr->intrinsic == nir_intrinsic_load_barycentric_centroid) + info->uses_persp_centroid = true; + else + info->uses_persp_center = true; + } + if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) + info->uses_interp_at_sample = true; + break; + } + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_input_vertex: + case nir_intrinsic_load_interpolated_input: + scan_io_usage(info, intr, true); + break; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: + scan_io_usage(info, intr, false); + break; + case nir_intrinsic_load_deref: + case nir_intrinsic_store_deref: + case nir_intrinsic_interp_deref_at_centroid: + case nir_intrinsic_interp_deref_at_sample: + case nir_intrinsic_interp_deref_at_offset: + unreachable("these opcodes should have been lowered"); + break; + default: + break; + } + } } -void si_nir_scan_tess_ctrl(const struct nir_shader *nir, - struct tgsi_tessctrl_info *out) +void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *info) { - memset(out, 0, sizeof(*out)); - - if (nir->info.stage != MESA_SHADER_TESS_CTRL) - return; - - out->tessfactors_are_def_in_all_invocs = - ac_are_tessfactors_def_in_all_invocs(nir); + nir_function *func; + + info->base = nir->info; + info->stage = nir->info.stage; + + if (nir->info.stage == MESA_SHADER_TESS_EVAL) { + if (info->base.tess.primitive_mode == GL_ISOLINES) + info->base.tess.primitive_mode = GL_LINES; + } + + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + /* post_depth_coverage implies early_fragment_tests */ + info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage; + + info->color_interpolate[0] = nir->info.fs.color0_interp; + info->color_interpolate[1] = nir->info.fs.color1_interp; + for (unsigned i = 0; i < 2; i++) { + if (info->color_interpolate[i] == INTERP_MODE_NONE) + info->color_interpolate[i] = INTERP_MODE_COLOR; + } + + info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : + nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : + TGSI_INTERPOLATE_LOC_CENTER; + info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : + nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : + TGSI_INTERPOLATE_LOC_CENTER; + } + + info->constbuf0_num_slots = nir->num_uniforms; + + if (info->stage == MESA_SHADER_FRAGMENT) + info->uses_kill = nir->info.fs.uses_discard; + + if (nir->info.stage == MESA_SHADER_TESS_CTRL) { + info->tessfactors_are_def_in_all_invocs = ac_are_tessfactors_def_in_all_invocs(nir); + } + + memset(info->output_semantic_to_slot, -1, sizeof(info->output_semantic_to_slot)); + + func = (struct nir_function *)exec_list_get_head_const(&nir->functions); + nir_foreach_block (block, func->impl) { + nir_foreach_instr (instr, block) + scan_instruction(nir, info, instr); + } + + /* Add color inputs to the list of inputs. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + for (unsigned i = 0; i < 2; i++) { + if ((info->colors_read >> (i * 4)) & 0xf) { + info->input_semantic[info->num_inputs] = VARYING_SLOT_COL0 + i; + info->input_interpolate[info->num_inputs] = info->color_interpolate[i]; + info->input_usage_mask[info->num_inputs] = info->colors_read >> (i * 4); + info->num_inputs++; + } + } + } + + /* Trim output read masks based on write masks. */ + for (unsigned i = 0; i < info->num_outputs; i++) + info->output_readmask[i] &= info->output_usagemask[i]; } -void si_nir_scan_shader(const struct nir_shader *nir, - struct tgsi_shader_info *info) +static void si_nir_opts(struct nir_shader *nir, bool first) { - nir_function *func; - unsigned i; - - info->processor = pipe_shader_type_from_mesa(nir->info.stage); - info->num_tokens = 2; /* indicate that the shader is non-empty */ - info->num_instructions = 2; - - info->properties[TGSI_PROPERTY_NEXT_SHADER] = - pipe_shader_type_from_mesa(nir->info.next_stage); - - if (nir->info.stage == MESA_SHADER_VERTEX) { - info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] = - nir->info.vs.window_space_position; - } - - if (nir->info.stage == MESA_SHADER_TESS_CTRL) { - info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] = - nir->info.tess.tcs_vertices_out; - } - - if (nir->info.stage == MESA_SHADER_TESS_EVAL) { - if (nir->info.tess.primitive_mode == GL_ISOLINES) - info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = PIPE_PRIM_LINES; - else - info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = nir->info.tess.primitive_mode; - - STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL); - STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 == - PIPE_TESS_SPACING_FRACTIONAL_ODD); - STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 == - PIPE_TESS_SPACING_FRACTIONAL_EVEN); - - info->properties[TGSI_PROPERTY_TES_SPACING] = (nir->info.tess.spacing + 1) % 3; - info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW] = !nir->info.tess.ccw; - info->properties[TGSI_PROPERTY_TES_POINT_MODE] = nir->info.tess.point_mode; - } - - if (nir->info.stage == MESA_SHADER_GEOMETRY) { - info->properties[TGSI_PROPERTY_GS_INPUT_PRIM] = nir->info.gs.input_primitive; - info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM] = nir->info.gs.output_primitive; - info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] = nir->info.gs.vertices_out; - info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = nir->info.gs.invocations; - } - - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] = - nir->info.fs.early_fragment_tests | nir->info.fs.post_depth_coverage; - info->properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE] = nir->info.fs.post_depth_coverage; - - if (nir->info.fs.pixel_center_integer) { - info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] = - TGSI_FS_COORD_PIXEL_CENTER_INTEGER; - } - - if (nir->info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) { - switch (nir->info.fs.depth_layout) { - case FRAG_DEPTH_LAYOUT_ANY: - info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_ANY; - break; - case FRAG_DEPTH_LAYOUT_GREATER: - info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_GREATER; - break; - case FRAG_DEPTH_LAYOUT_LESS: - info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_LESS; - break; - case FRAG_DEPTH_LAYOUT_UNCHANGED: - info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_UNCHANGED; - break; - default: - unreachable("Unknow depth layout"); - } - } - } - - if (gl_shader_stage_is_compute(nir->info.stage)) { - info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.cs.local_size[0]; - info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.cs.local_size[1]; - info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.cs.local_size[2]; - } - - i = 0; - uint64_t processed_inputs = 0; - unsigned num_inputs = 0; - nir_foreach_variable(variable, &nir->inputs) { - unsigned semantic_name, semantic_index; - - const struct glsl_type *type = variable->type; - if (nir_is_per_vertex_io(variable, nir->info.stage)) { - assert(glsl_type_is_array(type)); - type = glsl_get_array_element(type); - } - - unsigned attrib_count = glsl_count_attribute_slots(type, - nir->info.stage == MESA_SHADER_VERTEX); - - i = variable->data.driver_location; - - /* Vertex shader inputs don't have semantics. The state - * tracker has already mapped them to attributes via - * variable->data.driver_location. - */ - if (nir->info.stage == MESA_SHADER_VERTEX) { - if (glsl_type_is_dual_slot(glsl_without_array(variable->type))) - num_inputs++; - - num_inputs++; - continue; - } - - for (unsigned j = 0; j < attrib_count; j++, i++) { - - if (processed_inputs & ((uint64_t)1 << i)) - continue; - - processed_inputs |= ((uint64_t)1 << i); - num_inputs++; - - tgsi_get_gl_varying_semantic(variable->data.location + j, true, - &semantic_name, &semantic_index); - - info->input_semantic_name[i] = semantic_name; - info->input_semantic_index[i] = semantic_index; - - if (semantic_name == TGSI_SEMANTIC_PRIMID) - info->uses_primid = true; - - if (semantic_name == TGSI_SEMANTIC_COLOR) { - /* We only need this for color inputs. */ - if (variable->data.sample) - info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE; - else if (variable->data.centroid) - info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID; - else - info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER; - } - - enum glsl_base_type base_type = - glsl_get_base_type(glsl_without_array(variable->type)); - - switch (variable->data.interpolation) { - case INTERP_MODE_NONE: - if (glsl_base_type_is_integer(base_type)) { - info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; - break; - } - - if (semantic_name == TGSI_SEMANTIC_COLOR) { - info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR; - break; - } - /* fall-through */ - - case INTERP_MODE_SMOOTH: - assert(!glsl_base_type_is_integer(base_type)); - - info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - - case INTERP_MODE_NOPERSPECTIVE: - assert(!glsl_base_type_is_integer(base_type)); - - info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR; - break; - - case INTERP_MODE_FLAT: - info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; - break; - } - } - } - - info->num_inputs = num_inputs; - - i = 0; - uint64_t processed_outputs = 0; - unsigned num_outputs = 0; - nir_foreach_variable(variable, &nir->outputs) { - unsigned semantic_name, semantic_index; - - i = variable->data.driver_location; - - const struct glsl_type *type = variable->type; - if (nir_is_per_vertex_io(variable, nir->info.stage)) { - assert(glsl_type_is_array(type)); - type = glsl_get_array_element(type); - } - - unsigned attrib_count = glsl_count_attribute_slots(type, false); - for (unsigned k = 0; k < attrib_count; k++, i++) { - - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - tgsi_get_gl_frag_result_semantic(variable->data.location + k, - &semantic_name, &semantic_index); - - /* Adjust for dual source blending */ - if (variable->data.index > 0) { - semantic_index++; - } - } else { - tgsi_get_gl_varying_semantic(variable->data.location + k, true, - &semantic_name, &semantic_index); - } - - unsigned num_components = 4; - unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(variable->type)); - if (vector_elements) - num_components = vector_elements; - - unsigned component = variable->data.location_frac; - if (glsl_type_is_64bit(glsl_without_array(variable->type))) { - if (glsl_type_is_dual_slot(glsl_without_array(variable->type)) && k % 2) { - num_components = (num_components * 2) - 4; - component = 0; - } else { - num_components = MIN2(num_components * 2, 4); - } - } - - ubyte usagemask = 0; - for (unsigned j = component; j < num_components + component; j++) { - switch (j) { - case 0: - usagemask |= TGSI_WRITEMASK_X; - break; - case 1: - usagemask |= TGSI_WRITEMASK_Y; - break; - case 2: - usagemask |= TGSI_WRITEMASK_Z; - break; - case 3: - usagemask |= TGSI_WRITEMASK_W; - break; - default: - unreachable("error calculating component index"); - } - } - - unsigned gs_out_streams; - if (variable->data.stream & (1u << 31)) { - gs_out_streams = variable->data.stream & ~(1u << 31); - } else { - assert(variable->data.stream < 4); - gs_out_streams = 0; - for (unsigned j = 0; j < num_components; ++j) - gs_out_streams |= variable->data.stream << (2 * (component + j)); - } - - unsigned streamx = gs_out_streams & 3; - unsigned streamy = (gs_out_streams >> 2) & 3; - unsigned streamz = (gs_out_streams >> 4) & 3; - unsigned streamw = (gs_out_streams >> 6) & 3; - - if (usagemask & TGSI_WRITEMASK_X) { - info->output_streams[i] |= streamx; - info->num_stream_output_components[streamx]++; - } - if (usagemask & TGSI_WRITEMASK_Y) { - info->output_streams[i] |= streamy << 2; - info->num_stream_output_components[streamy]++; - } - if (usagemask & TGSI_WRITEMASK_Z) { - info->output_streams[i] |= streamz << 4; - info->num_stream_output_components[streamz]++; - } - if (usagemask & TGSI_WRITEMASK_W) { - info->output_streams[i] |= streamw << 6; - info->num_stream_output_components[streamw]++; - } - - /* make sure we only count this location once against - * the num_outputs counter. - */ - if (processed_outputs & ((uint64_t)1 << i)) - continue; - - processed_outputs |= ((uint64_t)1 << i); - num_outputs++; - - info->output_semantic_name[i] = semantic_name; - info->output_semantic_index[i] = semantic_index; - - switch (semantic_name) { - case TGSI_SEMANTIC_PRIMID: - info->writes_primid = true; - break; - case TGSI_SEMANTIC_VIEWPORT_INDEX: - info->writes_viewport_index = true; - break; - case TGSI_SEMANTIC_LAYER: - info->writes_layer = true; - break; - case TGSI_SEMANTIC_PSIZE: - info->writes_psize = true; - break; - case TGSI_SEMANTIC_CLIPVERTEX: - info->writes_clipvertex = true; - break; - case TGSI_SEMANTIC_COLOR: - info->colors_written |= 1 << semantic_index; - break; - case TGSI_SEMANTIC_STENCIL: - info->writes_stencil = true; - break; - case TGSI_SEMANTIC_SAMPLEMASK: - info->writes_samplemask = true; - break; - case TGSI_SEMANTIC_EDGEFLAG: - info->writes_edgeflag = true; - break; - case TGSI_SEMANTIC_POSITION: - if (info->processor == PIPE_SHADER_FRAGMENT) - info->writes_z = true; - else - info->writes_position = true; - break; - } - } - - unsigned loc = variable->data.location; - if (nir->info.stage == MESA_SHADER_FRAGMENT && - loc == FRAG_RESULT_COLOR && - nir->info.outputs_written & (1ull << loc)) { - assert(attrib_count == 1); - info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] = true; - } - } - - info->num_outputs = num_outputs; - - struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - struct set *ssbo_set = _mesa_set_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - /* Intialise const_file_max[0] */ - info->const_file_max[0] = -1; - - /* The first 8 are reserved for atomic counters using ssbo */ - unsigned ssbo_idx = 8; - - unsigned ubo_idx = 1; - nir_foreach_variable(variable, &nir->uniforms) { - const struct glsl_type *type = variable->type; - enum glsl_base_type base_type = - glsl_get_base_type(glsl_without_array(type)); - unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type)); - unsigned loc = variable->data.driver_location / 4; - int slot_count = glsl_count_attribute_slots(type, false); - int max_slot = MAX2(info->const_file_max[0], (int) loc) + slot_count; - - /* Gather buffers declared bitmasks. Note: radeonsi doesn't - * really use the mask (other than ubo_idx == 1 for regular - * uniforms) its really only used for getting the buffer count - * so we don't need to worry about the ordering. - */ - if (variable->interface_type != NULL) { - if (variable->data.mode == nir_var_uniform || - variable->data.mode == nir_var_mem_ubo || - variable->data.mode == nir_var_mem_ssbo) { - - struct set *buf_set = variable->data.mode == nir_var_mem_ssbo ? - ssbo_set : ubo_set; - - unsigned block_count; - if (base_type != GLSL_TYPE_INTERFACE) { - struct set_entry *entry = - _mesa_set_search(buf_set, variable->interface_type); - - /* Check if we have already processed - * a member from this ubo. - */ - if (entry) - continue; - - block_count = 1; - } else { - block_count = aoa_size; - } - - if (variable->data.mode == nir_var_uniform || - variable->data.mode == nir_var_mem_ubo) { - info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_count); - ubo_idx += block_count; - } else { - assert(variable->data.mode == nir_var_mem_ssbo); - - info->shader_buffers_declared |= u_bit_consecutive(ssbo_idx, block_count); - ssbo_idx += block_count; - } - - _mesa_set_add(buf_set, variable->interface_type); - } - - continue; - } - - /* We rely on the fact that nir_lower_samplers_as_deref has - * eliminated struct dereferences. - */ - if (base_type == GLSL_TYPE_SAMPLER && !variable->data.bindless) { - info->samplers_declared |= - u_bit_consecutive(variable->data.binding, aoa_size); - } else if (base_type == GLSL_TYPE_IMAGE && !variable->data.bindless) { - info->images_declared |= - u_bit_consecutive(variable->data.binding, aoa_size); - } else if (base_type != GLSL_TYPE_ATOMIC_UINT) { - info->const_buffers_declared |= 1; - info->const_file_max[0] = max_slot; - } - } - - _mesa_set_destroy(ubo_set, NULL); - _mesa_set_destroy(ssbo_set, NULL); - - info->num_written_clipdistance = nir->info.clip_distance_array_size; - info->num_written_culldistance = nir->info.cull_distance_array_size; - info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance); - info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance); - - if (info->processor == PIPE_SHADER_FRAGMENT) - info->uses_kill = nir->info.fs.uses_discard; - - func = (struct nir_function *)exec_list_get_head_const(&nir->functions); - nir_foreach_block(block, func->impl) { - nir_foreach_instr(instr, block) - scan_instruction(nir, info, instr); - } + bool progress; + + NIR_PASS_V(nir, nir_lower_vars_to_ssa); + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); + NIR_PASS_V(nir, nir_lower_phis_to_scalar); + + do { + progress = false; + bool lower_alu_to_scalar = false; + bool lower_phis_to_scalar = false; + + if (first) { + bool opt_find_array_copies = false; + + NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp); + NIR_PASS(lower_alu_to_scalar, nir, nir_shrink_vec_array_vars, nir_var_function_temp); + NIR_PASS(opt_find_array_copies, nir, nir_opt_find_array_copies); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + + /* Call nir_lower_var_copies() to remove any copies introduced + * by nir_opt_find_array_copies(). + */ + if (opt_find_array_copies) + NIR_PASS(progress, nir, nir_lower_var_copies); + progress |= opt_find_array_copies; + } else { + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + } + + NIR_PASS(progress, nir, nir_opt_dead_write_vars); + + NIR_PASS(lower_alu_to_scalar, nir, nir_opt_trivial_continues); + /* (Constant) copy propagation is needed for txf with offsets. */ + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(lower_phis_to_scalar, nir, nir_opt_if, true); + NIR_PASS(progress, nir, nir_opt_dead_cf); + + if (lower_alu_to_scalar) + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); + if (lower_phis_to_scalar) + NIR_PASS_V(nir, nir_lower_phis_to_scalar); + progress |= lower_alu_to_scalar | lower_phis_to_scalar; + + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); + + /* Needed for algebraic lowering */ + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_constant_folding); + + if (!nir->info.flrp_lowered) { + unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) | + (nir->options->lower_flrp32 ? 32 : 0) | + (nir->options->lower_flrp64 ? 64 : 0); + assert(lower_flrp); + bool lower_flrp_progress = false; + + NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, lower_flrp, false /* always_precise */); + if (lower_flrp_progress) { + NIR_PASS(progress, nir, nir_opt_constant_folding); + progress = true; + } + + /* Nothing should rematerialize any flrps, so we only + * need to do this lowering once. + */ + nir->info.flrp_lowered = true; + } + + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_opt_conditional_discard); + if (nir->options->max_unroll_iterations) { + NIR_PASS(progress, nir, nir_opt_loop_unroll, 0); + } + } while (progress); } -void -si_nir_opts(struct nir_shader *nir) +static int type_size_vec4(const struct glsl_type *type, bool bindless) { - bool progress; - unsigned lower_flrp = - (nir->options->lower_flrp16 ? 16 : 0) | - (nir->options->lower_flrp32 ? 32 : 0) | - (nir->options->lower_flrp64 ? 64 : 0); - - do { - progress = false; - - NIR_PASS_V(nir, nir_lower_vars_to_ssa); - - NIR_PASS(progress, nir, nir_opt_copy_prop_vars); - NIR_PASS(progress, nir, nir_opt_dead_write_vars); - - NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL); - NIR_PASS_V(nir, nir_lower_phis_to_scalar); - - /* (Constant) copy propagation is needed for txf with offsets. */ - NIR_PASS(progress, nir, nir_copy_prop); - NIR_PASS(progress, nir, nir_opt_remove_phis); - NIR_PASS(progress, nir, nir_opt_dce); - if (nir_opt_trivial_continues(nir)) { - progress = true; - NIR_PASS(progress, nir, nir_copy_prop); - NIR_PASS(progress, nir, nir_opt_dce); - } - NIR_PASS(progress, nir, nir_opt_if, true); - NIR_PASS(progress, nir, nir_opt_dead_cf); - NIR_PASS(progress, nir, nir_opt_cse); - NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); - - /* Needed for algebraic lowering */ - NIR_PASS(progress, nir, nir_opt_algebraic); - NIR_PASS(progress, nir, nir_opt_constant_folding); - - if (lower_flrp != 0) { - bool lower_flrp_progress = false; - - NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, - lower_flrp, - false /* always_precise */, - nir->options->lower_ffma); - if (lower_flrp_progress) { - NIR_PASS(progress, nir, - nir_opt_constant_folding); - progress = true; - } - - /* Nothing should rematerialize any flrps, so we only - * need to do this lowering once. - */ - lower_flrp = 0; - } - - NIR_PASS(progress, nir, nir_opt_undef); - NIR_PASS(progress, nir, nir_opt_conditional_discard); - if (nir->options->max_unroll_iterations) { - NIR_PASS(progress, nir, nir_opt_loop_unroll, 0); - } - } while (progress); + return glsl_count_attribute_slots(type, false); } -static int -type_size_vec4(const struct glsl_type *type, bool bindless) +static void si_nir_lower_color(nir_shader *nir) { - return glsl_count_attribute_slots(type, false); + nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); + + nir_builder b; + nir_builder_init(&b, entrypoint); + + nir_foreach_block (block, entrypoint) { + nir_foreach_instr_safe (instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic != nir_intrinsic_load_deref) + continue; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (deref->mode != nir_var_shader_in) + continue; + + b.cursor = nir_before_instr(instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + nir_ssa_def *def; + + if (var->data.location == VARYING_SLOT_COL0) { + def = nir_load_color0(&b); + nir->info.fs.color0_interp = var->data.interpolation; + nir->info.fs.color0_sample = var->data.sample; + nir->info.fs.color0_centroid = var->data.centroid; + } else if (var->data.location == VARYING_SLOT_COL1) { + def = nir_load_color1(&b); + nir->info.fs.color1_interp = var->data.interpolation; + nir->info.fs.color1_sample = var->data.sample; + nir->info.fs.color1_centroid = var->data.centroid; + } else { + continue; + } + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(def)); + nir_instr_remove(instr); + } + } } -static void -si_nir_lower_color(nir_shader *nir) +static void si_lower_io(struct nir_shader *nir) { - nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); - - nir_builder b; - nir_builder_init(&b, entrypoint); - - nir_foreach_block(block, entrypoint) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = - nir_instr_as_intrinsic(instr); - - if (intrin->intrinsic != nir_intrinsic_load_deref) - continue; - - nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); - if (deref->mode != nir_var_shader_in) - continue; - - b.cursor = nir_before_instr(instr); - nir_variable *var = nir_deref_instr_get_variable(deref); - nir_ssa_def *def; - - if (var->data.location == VARYING_SLOT_COL0) { - def = nir_load_color0(&b); - } else if (var->data.location == VARYING_SLOT_COL1) { - def = nir_load_color1(&b); - } else { - continue; - } - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(def)); - nir_instr_remove(instr); - } - } + /* HW supports indirect indexing for: | Enabled in driver + * ------------------------------------------------------- + * VS inputs | No + * TCS inputs | Yes + * TES inputs | Yes + * GS inputs | No + * ------------------------------------------------------- + * VS outputs before TCS | No + * VS outputs before GS | No + * TCS outputs | Yes + * TES outputs before GS | No + */ + bool has_indirect_inputs = nir->info.stage == MESA_SHADER_TESS_CTRL || + nir->info.stage == MESA_SHADER_TESS_EVAL; + bool has_indirect_outputs = nir->info.stage == MESA_SHADER_TESS_CTRL; + + if (!has_indirect_inputs || !has_indirect_outputs) { + NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), + !has_indirect_outputs, !has_indirect_inputs); + + /* Since we're doing nir_lower_io_to_temporaries late, we need + * to lower all the copy_deref's introduced by + * lower_io_to_temporaries before calling nir_lower_io. + */ + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_lower_var_copies); + NIR_PASS_V(nir, nir_lower_global_vars_to_local); + } + + if (nir->info.stage == MESA_SHADER_FRAGMENT) + si_nir_lower_color(nir); + + NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in, + type_size_vec4, 0); + nir->info.io_lowered = true; + + /* This pass needs actual constants */ + NIR_PASS_V(nir, nir_opt_constant_folding); + NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in); + NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_out); + + /* Remove dead derefs, so that nir_validate doesn't fail. */ + NIR_PASS_V(nir, nir_opt_dce); + + /* Remove input and output nir_variables, because we don't need them + * anymore. Also remove uniforms, because those should have been lowered + * to UBOs already. + */ + unsigned modes = nir_var_shader_in | nir_var_shader_out | nir_var_uniform; + nir_foreach_variable_with_modes_safe(var, nir, modes) { + if (var->data.mode == nir_var_uniform && + (glsl_type_get_image_count(var->type) || + glsl_type_get_sampler_count(var->type))) + continue; + + exec_node_remove(&var->node); + } } /** * Perform "lowering" operations on the NIR that are run once when the shader * selector is created. */ -void -si_lower_nir(struct si_shader_selector* sel, unsigned wave_size) -{ - /* Adjust the driver location of inputs and outputs. The state tracker - * interprets them as slots, while the ac/nir backend interprets them - * as individual components. - */ - if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) { - nir_foreach_variable(variable, &sel->nir->inputs) - variable->data.driver_location *= 4; - } else { - NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, - nir_shader_get_entrypoint(sel->nir), false, true); - - /* Since we're doing nir_lower_io_to_temporaries late, we need - * to lower all the copy_deref's introduced by - * lower_io_to_temporaries before calling nir_lower_io. - */ - NIR_PASS_V(sel->nir, nir_split_var_copies); - NIR_PASS_V(sel->nir, nir_lower_var_copies); - NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local); - - si_nir_lower_color(sel->nir); - NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0); - - /* This pass needs actual constants */ - NIR_PASS_V(sel->nir, nir_opt_constant_folding); - NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, - nir_var_shader_in); - } - - nir_foreach_variable(variable, &sel->nir->outputs) { - variable->data.driver_location *= 4; - - if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) { - if (variable->data.location == FRAG_RESULT_DEPTH) - variable->data.driver_location += 2; - else if (variable->data.location == FRAG_RESULT_STENCIL) - variable->data.driver_location += 1; - } - } - - /* Perform lowerings (and optimizations) of code. - * - * Performance considerations aside, we must: - * - lower certain ALU operations - * - ensure constant offsets for texture instructions are folded - * and copy-propagated - */ - - static const struct nir_lower_tex_options lower_tex_options = { - .lower_txp = ~0u, - }; - NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options); - - const nir_lower_subgroups_options subgroups_options = { - .subgroup_size = wave_size, - .ballot_bit_size = wave_size, - .lower_to_scalar = true, - .lower_subgroup_masks = true, - .lower_vote_trivial = false, - .lower_vote_eq_to_ballot = true, - }; - NIR_PASS_V(sel->nir, nir_lower_subgroups, &subgroups_options); - - ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class); - - si_nir_opts(sel->nir); - - NIR_PASS_V(sel->nir, nir_lower_bool_to_int32); - - /* Strip the resulting shader so that the shader cache is more likely - * to hit from other similar shaders. - */ - nir_strip(sel->nir); -} - -static void declare_nir_input_vs(struct si_shader_context *ctx, - struct nir_variable *variable, - unsigned input_index, - LLVMValueRef out[4]) +static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) { - si_llvm_load_input_vs(ctx, input_index, out); + /* Perform lowerings (and optimizations) of code. + * + * Performance considerations aside, we must: + * - lower certain ALU operations + * - ensure constant offsets for texture instructions are folded + * and copy-propagated + */ + + static const struct nir_lower_tex_options lower_tex_options = { + .lower_txp = ~0u, + }; + NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); + + const nir_lower_subgroups_options subgroups_options = { + .subgroup_size = 64, + .ballot_bit_size = 64, + .lower_to_scalar = true, + .lower_subgroup_masks = true, + .lower_vote_trivial = false, + .lower_vote_eq_to_ballot = true, + }; + NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options); + + /* Lower load constants to scalar and then clean up the mess */ + NIR_PASS_V(nir, nir_lower_load_const_to_scalar); + NIR_PASS_V(nir, nir_lower_var_copies); + NIR_PASS_V(nir, nir_lower_pack); + NIR_PASS_V(nir, nir_opt_access); + si_nir_opts(nir, true); + + /* Lower large variables that are always constant with load_constant + * intrinsics, which get turned into PC-relative loads from a data + * section next to the shader. + * + * st/mesa calls finalize_nir twice, but we can't call this pass twice. + */ + bool changed = false; + if (!nir->constant_data) { + /* The pass crashes if there are dead temps of lowered IO interface types. */ + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + NIR_PASS(changed, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16); + } + + changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class); + if (changed) + si_nir_opts(nir, false); + + NIR_PASS_V(nir, nir_lower_bool_to_int32); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + + if (sscreen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) + NIR_PASS_V(nir, nir_lower_discard_to_demote); } -LLVMValueRef -si_nir_lookup_interp_param(struct ac_shader_abi *abi, - enum glsl_interp_mode interp, unsigned location) +void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize) { - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - int interp_param_idx = -1; - - switch (interp) { - case INTERP_MODE_FLAT: - return NULL; - case INTERP_MODE_SMOOTH: - case INTERP_MODE_NONE: - if (location == INTERP_CENTER) - interp_param_idx = SI_PARAM_PERSP_CENTER; - else if (location == INTERP_CENTROID) - interp_param_idx = SI_PARAM_PERSP_CENTROID; - else if (location == INTERP_SAMPLE) - interp_param_idx = SI_PARAM_PERSP_SAMPLE; - break; - case INTERP_MODE_NOPERSPECTIVE: - if (location == INTERP_CENTER) - interp_param_idx = SI_PARAM_LINEAR_CENTER; - else if (location == INTERP_CENTROID) - interp_param_idx = SI_PARAM_LINEAR_CENTROID; - else if (location == INTERP_SAMPLE) - interp_param_idx = SI_PARAM_LINEAR_SAMPLE; - break; - default: - assert(!"Unhandled interpolation mode."); - return NULL; - } - - return interp_param_idx != -1 ? - LLVMGetParam(ctx->main_fn, interp_param_idx) : NULL; -} - -static LLVMValueRef -si_nir_load_sampler_desc(struct ac_shader_abi *abi, - unsigned descriptor_set, unsigned base_index, - unsigned constant_index, LLVMValueRef dynamic_index, - enum ac_descriptor_type desc_type, bool image, - bool write, bool bindless) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - LLVMBuilderRef builder = ctx->ac.builder; - unsigned const_index = base_index + constant_index; - - assert(!descriptor_set); - assert(!image || desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER); - - if (bindless) { - LLVMValueRef list = - LLVMGetParam(ctx->main_fn, ctx->param_bindless_samplers_and_images); - - /* dynamic_index is the bindless handle */ - if (image) { - /* For simplicity, bindless image descriptors use fixed - * 16-dword slots for now. - */ - dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, - LLVMConstInt(ctx->i64, 2, 0), ""); - - return si_load_image_desc(ctx, list, dynamic_index, desc_type, - write, true); - } - - /* Since bindless handle arithmetic can contain an unsigned integer - * wraparound and si_load_sampler_desc assumes there isn't any, - * use GEP without "inbounds" (inside ac_build_pointer_add) - * to prevent incorrect code generation and hangs. - */ - dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, - LLVMConstInt(ctx->i64, 2, 0), ""); - list = ac_build_pointer_add(&ctx->ac, list, dynamic_index); - return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type); - } - - unsigned num_slots = image ? ctx->num_images : ctx->num_samplers; - assert(const_index < num_slots); - - LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images); - LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false); - - if (dynamic_index) { - index = LLVMBuildAdd(builder, index, dynamic_index, ""); - - /* From the GL_ARB_shader_image_load_store extension spec: - * - * If a shader performs an image load, store, or atomic - * operation using an image variable declared as an array, - * and if the index used to select an individual element is - * negative or greater than or equal to the size of the - * array, the results of the operation are undefined but may - * not lead to termination. - */ - index = si_llvm_bound_index(ctx, index, num_slots); - } - - if (image) { - index = LLVMBuildSub(ctx->ac.builder, - LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0), - index, ""); - return si_load_image_desc(ctx, list, index, desc_type, write, false); - } - - index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), ""); - return si_load_sampler_desc(ctx, list, index, desc_type); -} + struct si_screen *sscreen = (struct si_screen *)screen; + struct nir_shader *nir = (struct nir_shader *)nirptr; -static void bitcast_inputs(struct si_shader_context *ctx, - LLVMValueRef data[4], - unsigned input_idx) -{ - for (unsigned chan = 0; chan < 4; chan++) { - ctx->inputs[input_idx + chan] = - LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, ""); - } -} - -bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) -{ - struct tgsi_shader_info *info = &ctx->shader->selector->info; - - if (nir->info.stage == MESA_SHADER_VERTEX) { - uint64_t processed_inputs = 0; - nir_foreach_variable(variable, &nir->inputs) { - unsigned attrib_count = glsl_count_attribute_slots(variable->type, - true); - unsigned input_idx = variable->data.driver_location; - - LLVMValueRef data[4]; - unsigned loc = variable->data.location; - - for (unsigned i = 0; i < attrib_count; i++) { - /* Packed components share the same location so skip - * them if we have already processed the location. - */ - if (processed_inputs & ((uint64_t)1 << (loc + i))) { - input_idx += 4; - continue; - } - - declare_nir_input_vs(ctx, variable, input_idx / 4, data); - bitcast_inputs(ctx, data, input_idx); - if (glsl_type_is_dual_slot(variable->type)) { - input_idx += 4; - declare_nir_input_vs(ctx, variable, input_idx / 4, data); - bitcast_inputs(ctx, data, input_idx); - } - - processed_inputs |= ((uint64_t)1 << (loc + i)); - input_idx += 4; - } - } - } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { - unsigned colors_read = - ctx->shader->selector->info.colors_read; - LLVMValueRef main_fn = ctx->main_fn; - - LLVMValueRef undef = LLVMGetUndef(ctx->f32); - - unsigned offset = SI_PARAM_POS_FIXED_PT + 1; - - if (colors_read & 0x0f) { - unsigned mask = colors_read & 0x0f; - LLVMValueRef values[4]; - values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef; - values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef; - values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef; - values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef; - ctx->abi.color0 = - ac_to_integer(&ctx->ac, - ac_build_gather_values(&ctx->ac, values, 4)); - } - if (colors_read & 0xf0) { - unsigned mask = (colors_read & 0xf0) >> 4; - LLVMValueRef values[4]; - values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef; - values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef; - values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef; - values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef; - ctx->abi.color1 = - ac_to_integer(&ctx->ac, - ac_build_gather_values(&ctx->ac, values, 4)); - } - - ctx->abi.interp_at_sample_force_center = - ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center; - } - - ctx->abi.inputs = &ctx->inputs[0]; - ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; - ctx->abi.clamp_shadow_reference = true; - ctx->abi.robust_buffer_access = true; - - ctx->num_samplers = util_last_bit(info->samplers_declared); - ctx->num_images = util_last_bit(info->images_declared); - - if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) { - assert(gl_shader_stage_is_compute(nir->info.stage)); - si_declare_compute_memory(ctx); - } - ac_nir_translate(&ctx->ac, &ctx->abi, nir); - - return true; + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + si_lower_io(nir); + si_lower_nir(sscreen, nir); }