X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader_nir.c;h=46dd6e973fb2ac0b83935ff1124642b36d18b4d0;hb=64349a60e17a03de4bb7e03d942bfc1679dfe8ab;hp=d9e3ac41868d6f276a0f3cdd3b02faf371f203c8;hpb=f4d0565f5261d49e675c55183f77269a736c3e2b;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index d9e3ac41868..46dd6e973fb 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -48,14 +48,14 @@ static const nir_deref_instr *tex_get_texture_deref(nir_tex_instr *instr) static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr, bool is_input) { - unsigned interp = TGSI_INTERPOLATE_CONSTANT; /* load_input uses flat shading */ + unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */ if (intr->intrinsic == nir_intrinsic_load_interpolated_input) { nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr); if (baryc) { if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0) - interp = tgsi_get_interp_mode(nir_intrinsic_interp_mode(baryc), false); + interp = nir_intrinsic_interp_mode(baryc); else unreachable("unknown barycentric intrinsic"); } else { @@ -64,16 +64,18 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr } unsigned mask, bit_size; - bool dual_slot; + bool dual_slot, is_output_load; if (nir_intrinsic_infos[intr->intrinsic].index_map[NIR_INTRINSIC_WRMASK] > 0) { mask = nir_intrinsic_write_mask(intr); /* store */ bit_size = nir_src_bit_size(intr->src[0]); dual_slot = bit_size == 64 && nir_src_num_components(intr->src[0]) >= 3; + is_output_load = false; } else { mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */ bit_size = intr->dest.ssa.bit_size; dual_slot = bit_size == 64 && intr->dest.ssa.num_components >= 3; + is_output_load = !is_input; } /* Convert the 64-bit component mask to a 32-bit component mask. */ @@ -98,27 +100,25 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr mask <<= nir_intrinsic_component(intr); - unsigned name, index; - if (info->processor == PIPE_SHADER_VERTEX && is_input) { - /* VS doesn't have semantics. */ - name = 0; - index = 0; - } else if (info->processor == PIPE_SHADER_FRAGMENT && !is_input) { - tgsi_get_gl_frag_result_semantic(nir_intrinsic_io_semantics(intr).location, - &name, &index); - /* Adjust for dual source blending. */ - if (nir_intrinsic_io_semantics(intr).dual_source_blend_index) - index++; - } else { - tgsi_get_gl_varying_semantic(nir_intrinsic_io_semantics(intr).location, - true, &name, &index); - } - nir_src offset = *nir_get_io_offset_src(intr); bool indirect = !nir_src_is_const(offset); if (!indirect) assert(nir_src_as_uint(offset) == 0); + unsigned semantic = 0; + /* VS doesn't have semantics. */ + if (info->stage != MESA_SHADER_VERTEX || !is_input) + semantic = nir_intrinsic_io_semantics(intr).location; + + if (info->stage == MESA_SHADER_FRAGMENT && !is_input) { + /* Never use FRAG_RESULT_COLOR directly. */ + if (semantic == FRAG_RESULT_COLOR) { + semantic = FRAG_RESULT_DATA0; + info->color0_writes_all_cbufs = true; + } + semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index; + } + unsigned driver_location = nir_intrinsic_base(intr); unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : (1 + dual_slot); @@ -129,31 +129,39 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr unsigned loc = driver_location + i; unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf; - info->input_semantic_name[loc] = name; - info->input_semantic_index[loc] = index + i; + info->input_semantic[loc] = semantic + i; info->input_interpolate[loc] = interp; if (slot_mask) { info->input_usage_mask[loc] |= slot_mask; info->num_inputs = MAX2(info->num_inputs, loc + 1); - if (name == TGSI_SEMANTIC_PRIMID) + if (semantic == VARYING_SLOT_PRIMITIVE_ID) info->uses_primid = true; } } } else { /* Outputs. */ assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask)); + assert(semantic + num_slots < ARRAY_SIZE(info->output_semantic_to_slot)); for (unsigned i = 0; i < num_slots; i++) { unsigned loc = driver_location + i; unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf; - info->output_semantic_name[loc] = name; - info->output_semantic_index[loc] = index + i; + info->output_semantic[loc] = semantic + i; + info->output_semantic_to_slot[semantic + i] = loc; - if (slot_mask) { - if (info->processor == PIPE_SHADER_GEOMETRY) { + if (is_output_load) { + /* Output loads have only a few things that we need to track. */ + info->output_readmask[loc] |= slot_mask; + + if (info->stage == MESA_SHADER_FRAGMENT && + nir_intrinsic_io_semantics(intr).fb_fetch_output) + info->uses_fbfetch = true; + } else if (slot_mask) { + /* Output stores. */ + if (info->stage == MESA_SHADER_GEOMETRY) { unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams << (nir_intrinsic_component(intr) * 2); unsigned new_mask = slot_mask & ~info->output_usagemask[loc]; @@ -171,44 +179,48 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr info->output_usagemask[loc] |= slot_mask; info->num_outputs = MAX2(info->num_outputs, loc + 1); - switch (name) { - case TGSI_SEMANTIC_PRIMID: - info->writes_primid = true; - break; - case TGSI_SEMANTIC_VIEWPORT_INDEX: - info->writes_viewport_index = true; - break; - case TGSI_SEMANTIC_LAYER: - info->writes_layer = true; - break; - case TGSI_SEMANTIC_PSIZE: - info->writes_psize = true; - break; - case TGSI_SEMANTIC_CLIPVERTEX: - info->writes_clipvertex = true; - break; - case TGSI_SEMANTIC_COLOR: - info->colors_written |= 1 << (index + i); - - if (info->processor == PIPE_SHADER_FRAGMENT && - nir_intrinsic_io_semantics(intr).location == FRAG_RESULT_COLOR) - info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] = true; - break; - case TGSI_SEMANTIC_STENCIL: - info->writes_stencil = true; - break; - case TGSI_SEMANTIC_SAMPLEMASK: - info->writes_samplemask = true; - break; - case TGSI_SEMANTIC_EDGEFLAG: - info->writes_edgeflag = true; - break; - case TGSI_SEMANTIC_POSITION: - if (info->processor == PIPE_SHADER_FRAGMENT) + if (info->stage == MESA_SHADER_FRAGMENT) { + switch (semantic) { + case FRAG_RESULT_DEPTH: info->writes_z = true; - else + break; + case FRAG_RESULT_STENCIL: + info->writes_stencil = true; + break; + case FRAG_RESULT_SAMPLE_MASK: + info->writes_samplemask = true; + break; + default: + if (semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) { + unsigned index = semantic - FRAG_RESULT_DATA0; + info->colors_written |= 1 << (index + i); + } + break; + } + } else { + switch (semantic) { + case VARYING_SLOT_PRIMITIVE_ID: + info->writes_primid = true; + break; + case VARYING_SLOT_VIEWPORT: + info->writes_viewport_index = true; + break; + case VARYING_SLOT_LAYER: + info->writes_layer = true; + break; + case VARYING_SLOT_PSIZ: + info->writes_psize = true; + break; + case VARYING_SLOT_CLIP_VERTEX: + info->writes_clipvertex = true; + break; + case VARYING_SLOT_EDGE: + info->writes_edgeflag = true; + break; + case VARYING_SLOT_POS: info->writes_position = true; - break; + break; + } } } } @@ -218,42 +230,15 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr static void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info, nir_instr *instr) { - if (instr->type == nir_instr_type_alu) { - nir_alu_instr *alu = nir_instr_as_alu(instr); - - switch (alu->op) { - case nir_op_fddx: - case nir_op_fddy: - case nir_op_fddx_fine: - case nir_op_fddy_fine: - case nir_op_fddx_coarse: - case nir_op_fddy_coarse: - info->uses_derivatives = true; - break; - default: - break; - } - } else if (instr->type == nir_instr_type_tex) { + if (instr->type == nir_instr_type_tex) { nir_tex_instr *tex = nir_instr_as_tex(instr); const nir_deref_instr *deref = tex_get_texture_deref(tex); nir_variable *var = deref ? nir_deref_instr_get_variable(deref) : NULL; - if (!var) { - info->samplers_declared |= u_bit_consecutive(tex->sampler_index, 1); - } else { + if (var) { if (deref->mode != nir_var_uniform || var->data.bindless) info->uses_bindless_samplers = true; } - - switch (tex->op) { - case nir_texop_tex: - case nir_texop_txb: - case nir_texop_lod: - info->uses_derivatives = true; - break; - default: - break; - } } else if (instr->type == nir_instr_type_intrinsic) { nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); @@ -277,7 +262,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info break; case nir_intrinsic_load_local_group_size: /* The block size is translated to IMM with a fixed block size. */ - if (info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0) + if (info->base.cs.local_size[0] == 0) info->uses_block_size = true; break; case nir_intrinsic_load_local_invocation_id: @@ -293,15 +278,6 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info } break; } - case nir_intrinsic_load_vertex_id: - info->uses_vertexid = 1; - break; - case nir_intrinsic_load_vertex_id_zero_base: - info->uses_vertexid_nobase = 1; - break; - case nir_intrinsic_load_base_vertex: - info->uses_basevertex = 1; - break; case nir_intrinsic_load_draw_id: info->uses_drawid = 1; break; @@ -322,12 +298,10 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info break; case nir_intrinsic_bindless_image_store: info->uses_bindless_images = true; - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ + info->num_memory_stores++; break; case nir_intrinsic_image_deref_store: - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ + info->num_memory_stores++; break; case nir_intrinsic_bindless_image_atomic_add: case nir_intrinsic_bindless_image_atomic_imin: @@ -340,8 +314,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info case nir_intrinsic_bindless_image_atomic_exchange: case nir_intrinsic_bindless_image_atomic_comp_swap: info->uses_bindless_images = true; - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ + info->num_memory_stores++; break; case nir_intrinsic_image_deref_atomic_add: case nir_intrinsic_image_deref_atomic_imin: @@ -355,8 +328,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_image_deref_atomic_inc_wrap: case nir_intrinsic_image_deref_atomic_dec_wrap: - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ + info->num_memory_stores++; break; case nir_intrinsic_store_ssbo: case nir_intrinsic_ssbo_atomic_add: @@ -369,8 +341,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_comp_swap: - info->writes_memory = true; - info->num_memory_instructions++; /* we only care about stores */ + info->num_memory_stores++; break; case nir_intrinsic_load_color0: case nir_intrinsic_load_color1: { @@ -396,9 +367,6 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info info->uses_linear_centroid = true; else info->uses_linear_center = true; - - if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) - info->uses_linear_opcode_interp_sample = true; } else { if (intr->intrinsic == nir_intrinsic_load_barycentric_sample) info->uses_persp_sample = true; @@ -406,10 +374,9 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info info->uses_persp_centroid = true; else info->uses_persp_center = true; - - if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) - info->uses_persp_opcode_interp_sample = true; } + if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) + info->uses_interp_at_sample = true; break; } case nir_intrinsic_load_input: @@ -418,28 +385,12 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info case nir_intrinsic_load_interpolated_input: scan_io_usage(info, intr, true); break; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: scan_io_usage(info, intr, false); break; - case nir_intrinsic_load_output: { - unsigned location = nir_intrinsic_io_semantics(intr).location; - - if (nir->info.stage == MESA_SHADER_TESS_CTRL) { - if (location == VARYING_SLOT_TESS_LEVEL_INNER || - location == VARYING_SLOT_TESS_LEVEL_OUTER) - info->reads_tessfactor_outputs = true; - else - info->reads_perpatch_outputs = true; - } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { - if (nir_intrinsic_io_semantics(intr).fb_fetch_output) - info->uses_fbfetch = true; - } - break; - } - case nir_intrinsic_load_per_vertex_output: - info->reads_pervertex_outputs = true; - break; case nir_intrinsic_load_deref: case nir_intrinsic_store_deref: case nir_intrinsic_interp_deref_at_centroid: @@ -457,71 +408,25 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf { nir_function *func; - info->processor = pipe_shader_type_from_mesa(nir->info.stage); - - info->properties[TGSI_PROPERTY_NEXT_SHADER] = pipe_shader_type_from_mesa(nir->info.next_stage); - - if (nir->info.stage == MESA_SHADER_VERTEX) { - info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] = nir->info.vs.window_space_position; - info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD] = nir->info.vs.blit_sgprs_amd; - } - - if (nir->info.stage == MESA_SHADER_TESS_CTRL) { - info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] = nir->info.tess.tcs_vertices_out; - } + info->base = nir->info; + info->stage = nir->info.stage; if (nir->info.stage == MESA_SHADER_TESS_EVAL) { - if (nir->info.tess.primitive_mode == GL_ISOLINES) - info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = PIPE_PRIM_LINES; - else - info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = nir->info.tess.primitive_mode; - - STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL); - STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 == PIPE_TESS_SPACING_FRACTIONAL_ODD); - STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 == PIPE_TESS_SPACING_FRACTIONAL_EVEN); - - info->properties[TGSI_PROPERTY_TES_SPACING] = (nir->info.tess.spacing + 1) % 3; - info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW] = !nir->info.tess.ccw; - info->properties[TGSI_PROPERTY_TES_POINT_MODE] = nir->info.tess.point_mode; - } - - if (nir->info.stage == MESA_SHADER_GEOMETRY) { - info->properties[TGSI_PROPERTY_GS_INPUT_PRIM] = nir->info.gs.input_primitive; - info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM] = nir->info.gs.output_primitive; - info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] = nir->info.gs.vertices_out; - info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = nir->info.gs.invocations; + if (info->base.tess.primitive_mode == GL_ISOLINES) + info->base.tess.primitive_mode = GL_LINES; } if (nir->info.stage == MESA_SHADER_FRAGMENT) { - info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] = - nir->info.fs.early_fragment_tests | nir->info.fs.post_depth_coverage; - info->properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE] = nir->info.fs.post_depth_coverage; + /* post_depth_coverage implies early_fragment_tests */ + info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage; - if (nir->info.fs.pixel_center_integer) { - info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] = TGSI_FS_COORD_PIXEL_CENTER_INTEGER; - } - - if (nir->info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) { - switch (nir->info.fs.depth_layout) { - case FRAG_DEPTH_LAYOUT_ANY: - info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_ANY; - break; - case FRAG_DEPTH_LAYOUT_GREATER: - info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_GREATER; - break; - case FRAG_DEPTH_LAYOUT_LESS: - info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_LESS; - break; - case FRAG_DEPTH_LAYOUT_UNCHANGED: - info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_UNCHANGED; - break; - default: - unreachable("Unknow depth layout"); - } + info->color_interpolate[0] = nir->info.fs.color0_interp; + info->color_interpolate[1] = nir->info.fs.color1_interp; + for (unsigned i = 0; i < 2; i++) { + if (info->color_interpolate[i] == INTERP_MODE_NONE) + info->color_interpolate[i] = INTERP_MODE_COLOR; } - info->color_interpolate[0] = tgsi_get_interp_mode(nir->info.fs.color0_interp, true); - info->color_interpolate[1] = tgsi_get_interp_mode(nir->info.fs.color1_interp, true); info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE : nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID : TGSI_INTERPOLATE_LOC_CENTER; @@ -530,34 +435,14 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf TGSI_INTERPOLATE_LOC_CENTER; } - if (gl_shader_stage_is_compute(nir->info.stage)) { - info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.cs.local_size[0]; - info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.cs.local_size[1]; - info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.cs.local_size[2]; - info->properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD] = - nir->info.cs.user_data_components_amd; - } - info->constbuf0_num_slots = nir->num_uniforms; - info->shader_buffers_declared = u_bit_consecutive(0, nir->info.num_ssbos); - info->const_buffers_declared = u_bit_consecutive(0, nir->info.num_ubos); - info->images_declared = u_bit_consecutive(0, nir->info.num_images); - info->msaa_images_declared = nir->info.msaa_images; - info->image_buffers = nir->info.image_buffers; - info->samplers_declared = nir->info.textures_used; - - info->num_written_clipdistance = nir->info.clip_distance_array_size; - info->num_written_culldistance = nir->info.cull_distance_array_size; - info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance); - info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance); - - if (info->processor == PIPE_SHADER_FRAGMENT) - info->uses_kill = nir->info.fs.uses_discard; if (nir->info.stage == MESA_SHADER_TESS_CTRL) { info->tessfactors_are_def_in_all_invocs = ac_are_tessfactors_def_in_all_invocs(nir); } + memset(info->output_semantic_to_slot, -1, sizeof(info->output_semantic_to_slot)); + func = (struct nir_function *)exec_list_get_head_const(&nir->functions); nir_foreach_block (block, func->impl) { nir_foreach_instr (instr, block) @@ -568,14 +453,17 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf if (nir->info.stage == MESA_SHADER_FRAGMENT) { for (unsigned i = 0; i < 2; i++) { if ((info->colors_read >> (i * 4)) & 0xf) { - info->input_semantic_name[info->num_inputs] = TGSI_SEMANTIC_COLOR; - info->input_semantic_index[info->num_inputs] = i; + info->input_semantic[info->num_inputs] = VARYING_SLOT_COL0 + i; info->input_interpolate[info->num_inputs] = info->color_interpolate[i]; info->input_usage_mask[info->num_inputs] = info->colors_read >> (i * 4); info->num_inputs++; } } } + + /* Trim output read masks based on write masks. */ + for (unsigned i = 0; i < info->num_outputs; i++) + info->output_readmask[i] &= info->output_usagemask[i]; } static void si_nir_opts(struct nir_shader *nir, bool first) @@ -639,8 +527,7 @@ static void si_nir_opts(struct nir_shader *nir, bool first) assert(lower_flrp); bool lower_flrp_progress = false; - NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, lower_flrp, false /* always_precise */, - nir->options->lower_ffma); + NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp, lower_flrp, false /* always_precise */); if (lower_flrp_progress) { NIR_PASS(progress, nir, nir_opt_constant_folding); progress = true;