X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader_nir.c;h=453d31bd135e64e63af57f5d36b367cb5a2fbbd9;hb=e4ca1d64565b4d665bcaf5d08922bfbe1d920e7a;hp=22c7e238456d4b4d52d495e06ad534ba5b3ee06e;hpb=7c27ef182ca8203c0a75e3c450f80ef98383b25b;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 22c7e238456..453d31bd135 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -32,6 +32,12 @@ #include "compiler/nir_types.h" +static int +type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + static void scan_instruction(struct tgsi_shader_info *info, nir_instr *instr) { @@ -53,6 +59,11 @@ static void scan_instruction(struct tgsi_shader_info *info, } else if (instr->type == nir_instr_type_tex) { nir_tex_instr *tex = nir_instr_as_tex(instr); + if (!tex->texture) { + info->samplers_declared |= + u_bit_consecutive(tex->sampler_index, 1); + } + switch (tex->op) { case nir_texop_tex: case nir_texop_txb: @@ -72,6 +83,9 @@ static void scan_instruction(struct tgsi_shader_info *info, case nir_intrinsic_load_instance_id: info->uses_instanceid = 1; break; + case nir_intrinsic_load_invocation_id: + info->uses_invocationid = true; + break; case nir_intrinsic_load_vertex_id: info->uses_vertexid = 1; break; @@ -84,6 +98,13 @@ static void scan_instruction(struct tgsi_shader_info *info, case nir_intrinsic_load_primitive_id: info->uses_primid = 1; break; + case nir_intrinsic_load_sample_mask_in: + info->reads_samplemask = true; + break; + case nir_intrinsic_load_tess_level_inner: + case nir_intrinsic_load_tess_level_outer: + info->reads_tess_factors = true; + break; case nir_intrinsic_image_store: case nir_intrinsic_image_atomic_add: case nir_intrinsic_image_atomic_min: @@ -112,52 +133,120 @@ static void scan_instruction(struct tgsi_shader_info *info, } } +void si_nir_scan_tess_ctrl(const struct nir_shader *nir, + const struct tgsi_shader_info *info, + struct tgsi_tessctrl_info *out) +{ + memset(out, 0, sizeof(*out)); + + if (nir->info.stage != MESA_SHADER_TESS_CTRL) + return; + + /* Initial value = true. Here the pass will accumulate results from + * multiple segments surrounded by barriers. If tess factors aren't + * written at all, it's a shader bug and we don't care if this will be + * true. + */ + out->tessfactors_are_def_in_all_invocs = true; + + /* TODO: Implement scanning of tess factors, see tgsi backend. */ +} + void si_nir_scan_shader(const struct nir_shader *nir, struct tgsi_shader_info *info) { nir_function *func; unsigned i; - assert(nir->stage == MESA_SHADER_VERTEX || - nir->stage == MESA_SHADER_FRAGMENT); + assert(nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_GEOMETRY || + nir->info.stage == MESA_SHADER_TESS_CTRL || + nir->info.stage == MESA_SHADER_TESS_EVAL || + nir->info.stage == MESA_SHADER_FRAGMENT); - info->processor = pipe_shader_type_from_mesa(nir->stage); + info->processor = pipe_shader_type_from_mesa(nir->info.stage); info->num_tokens = 2; /* indicate that the shader is non-empty */ info->num_instructions = 2; - info->num_inputs = nir->num_inputs; - info->num_outputs = nir->num_outputs; + if (nir->info.stage == MESA_SHADER_TESS_CTRL) { + info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] = + nir->info.tess.tcs_vertices_out; + } + + if (nir->info.stage == MESA_SHADER_TESS_EVAL) { + if (nir->info.tess.primitive_mode == GL_ISOLINES) + info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = PIPE_PRIM_LINES; + else + info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = nir->info.tess.primitive_mode; + + STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL); + STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 == + PIPE_TESS_SPACING_FRACTIONAL_ODD); + STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 == + PIPE_TESS_SPACING_FRACTIONAL_EVEN); + + info->properties[TGSI_PROPERTY_TES_SPACING] = (nir->info.tess.spacing + 1) % 3; + info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW] = !nir->info.tess.ccw; + info->properties[TGSI_PROPERTY_TES_POINT_MODE] = nir->info.tess.point_mode; + } + + if (nir->info.stage == MESA_SHADER_GEOMETRY) { + info->properties[TGSI_PROPERTY_GS_INPUT_PRIM] = nir->info.gs.input_primitive; + info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM] = nir->info.gs.output_primitive; + info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] = nir->info.gs.vertices_out; + info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = nir->info.gs.invocations; + } i = 0; + uint64_t processed_inputs = 0; + unsigned num_inputs = 0; nir_foreach_variable(variable, &nir->inputs) { unsigned semantic_name, semantic_index; unsigned attrib_count = glsl_count_attribute_slots(variable->type, - nir->stage == MESA_SHADER_VERTEX); - - assert(attrib_count == 1 && "not implemented"); + nir->info.stage == MESA_SHADER_VERTEX); /* Vertex shader inputs don't have semantics. The state * tracker has already mapped them to attributes via * variable->data.driver_location. */ - if (nir->stage == MESA_SHADER_VERTEX) + if (nir->info.stage == MESA_SHADER_VERTEX) { + if (glsl_type_is_dual_slot(variable->type)) + num_inputs += 2; + else + num_inputs++; continue; + } + + assert(nir->info.stage != MESA_SHADER_FRAGMENT || + (attrib_count == 1 && "not implemented")); /* Fragment shader position is a system value. */ - if (nir->stage == MESA_SHADER_FRAGMENT && + if (nir->info.stage == MESA_SHADER_FRAGMENT && variable->data.location == VARYING_SLOT_POS) { if (variable->data.pixel_center_integer) info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] = TGSI_FS_COORD_PIXEL_CENTER_INTEGER; + + num_inputs++; continue; } + i = variable->data.driver_location; + if (processed_inputs & ((uint64_t)1 << i)) + continue; + + processed_inputs |= ((uint64_t)1 << i); + num_inputs++; + tgsi_get_gl_varying_semantic(variable->data.location, true, &semantic_name, &semantic_index); info->input_semantic_name[i] = semantic_name; info->input_semantic_index[i] = semantic_index; + if (semantic_name == TGSI_SEMANTIC_PRIMID) + info->uses_primid = true; + if (variable->data.sample) info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE; else if (variable->data.centroid) @@ -217,26 +306,78 @@ void si_nir_scan_shader(const struct nir_shader *nir, info->colors_read |= 0x0f; else if (variable->data.location == VARYING_SLOT_COL1) info->colors_read |= 0xf0; - - i++; } + info->num_inputs = num_inputs; + + i = 0; + uint64_t processed_outputs = 0; + unsigned num_outputs = 0; nir_foreach_variable(variable, &nir->outputs) { unsigned semantic_name, semantic_index; - if (nir->stage == MESA_SHADER_FRAGMENT) { + if (nir->info.stage == MESA_SHADER_FRAGMENT) { tgsi_get_gl_frag_result_semantic(variable->data.location, &semantic_name, &semantic_index); + + /* Adjust for dual source blending */ + if (variable->data.index > 0) { + semantic_index++; + } } else { tgsi_get_gl_varying_semantic(variable->data.location, true, &semantic_name, &semantic_index); } + i = variable->data.driver_location; + if (processed_outputs & ((uint64_t)1 << i)) + continue; + + processed_outputs |= ((uint64_t)1 << i); + num_outputs++; + info->output_semantic_name[i] = semantic_name; info->output_semantic_index[i] = semantic_index; info->output_usagemask[i] = TGSI_WRITEMASK_XYZW; + unsigned num_components = 4; + unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(variable->type)); + if (vector_elements) + num_components = vector_elements; + + unsigned gs_out_streams; + if (variable->data.stream & (1u << 31)) { + gs_out_streams = variable->data.stream & ~(1u << 31); + } else { + assert(variable->data.stream < 4); + gs_out_streams = 0; + for (unsigned j = 0; j < num_components; ++j) + gs_out_streams |= variable->data.stream << (2 * (variable->data.location_frac + j)); + } + + unsigned streamx = gs_out_streams & 3; + unsigned streamy = (gs_out_streams >> 2) & 3; + unsigned streamz = (gs_out_streams >> 4) & 3; + unsigned streamw = (gs_out_streams >> 6) & 3; + + if (info->output_usagemask[i] & TGSI_WRITEMASK_X) { + info->output_streams[i] |= streamx; + info->num_stream_output_components[streamx]++; + } + if (info->output_usagemask[i] & TGSI_WRITEMASK_Y) { + info->output_streams[i] |= streamy << 2; + info->num_stream_output_components[streamy]++; + } + if (info->output_usagemask[i] & TGSI_WRITEMASK_Z) { + info->output_streams[i] |= streamz << 4; + info->num_stream_output_components[streamz]++; + } + if (info->output_usagemask[i] & TGSI_WRITEMASK_W) { + info->output_streams[i] |= streamw << 6; + info->num_stream_output_components[streamw]++; + } + switch (semantic_name) { case TGSI_SEMANTIC_PRIMID: info->writes_primid = true; @@ -273,9 +414,23 @@ void si_nir_scan_shader(const struct nir_shader *nir, break; } - i++; + if (nir->info.stage == MESA_SHADER_TESS_CTRL) { + switch (semantic_name) { + case TGSI_SEMANTIC_PATCH: + info->reads_perpatch_outputs = true; + break; + case TGSI_SEMANTIC_TESSINNER: + case TGSI_SEMANTIC_TESSOUTER: + info->reads_tessfactor_outputs = true; + break; + default: + info->reads_pervertex_outputs = true; + } + } } + info->num_outputs = num_outputs; + nir_foreach_variable(variable, &nir->uniforms) { const struct glsl_type *type = variable->type; enum glsl_base_type base_type = @@ -296,8 +451,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, info->num_written_clipdistance = nir->info.clip_distance_array_size; info->num_written_culldistance = nir->info.cull_distance_array_size; info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance); - info->culldist_writemask = u_bit_consecutive(info->num_written_clipdistance, - info->num_written_culldistance); + info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance); if (info->processor == PIPE_SHADER_FRAGMENT) info->uses_kill = nir->info.fs.uses_discard; @@ -313,22 +467,105 @@ void si_nir_scan_shader(const struct nir_shader *nir, } } +/** + * Perform "lowering" operations on the NIR that are run once when the shader + * selector is created. + */ +void +si_lower_nir(struct si_shader_selector* sel) +{ + /* Adjust the driver location of inputs and outputs. The state tracker + * interprets them as slots, while the ac/nir backend interprets them + * as individual components. + */ + nir_foreach_variable(variable, &sel->nir->inputs) + variable->data.driver_location *= 4; + + nir_foreach_variable(variable, &sel->nir->outputs) { + variable->data.driver_location *= 4; + + if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) { + if (variable->data.location == FRAG_RESULT_DEPTH) + variable->data.driver_location += 2; + else if (variable->data.location == FRAG_RESULT_STENCIL) + variable->data.driver_location += 1; + } + } + + /* Perform lowerings (and optimizations) of code. + * + * Performance considerations aside, we must: + * - lower certain ALU operations + * - ensure constant offsets for texture instructions are folded + * and copy-propagated + */ + NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, type_size, + (nir_lower_io_options)0); + NIR_PASS_V(sel->nir, nir_lower_uniforms_to_ubo); + + NIR_PASS_V(sel->nir, nir_lower_returns); + NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa); + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + + static const struct nir_lower_tex_options lower_tex_options = { + .lower_txp = ~0u, + }; + NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options); + + const nir_lower_subgroups_options subgroups_options = { + .subgroup_size = 64, + .ballot_bit_size = 32, + .lower_to_scalar = true, + .lower_subgroup_masks = true, + .lower_vote_trivial = false, + }; + NIR_PASS_V(sel->nir, nir_lower_subgroups, &subgroups_options); + + bool progress; + do { + progress = false; + + /* (Constant) copy propagation is needed for txf with offsets. */ + NIR_PASS(progress, sel->nir, nir_copy_prop); + NIR_PASS(progress, sel->nir, nir_opt_remove_phis); + NIR_PASS(progress, sel->nir, nir_opt_dce); + if (nir_opt_trivial_continues(sel->nir)) { + progress = true; + NIR_PASS(progress, sel->nir, nir_copy_prop); + NIR_PASS(progress, sel->nir, nir_opt_dce); + } + NIR_PASS(progress, sel->nir, nir_opt_if); + NIR_PASS(progress, sel->nir, nir_opt_dead_cf); + NIR_PASS(progress, sel->nir, nir_opt_cse); + NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8); + + /* Needed for algebraic lowering */ + NIR_PASS(progress, sel->nir, nir_opt_algebraic); + NIR_PASS(progress, sel->nir, nir_opt_constant_folding); + + NIR_PASS(progress, sel->nir, nir_opt_undef); + NIR_PASS(progress, sel->nir, nir_opt_conditional_discard); + if (sel->nir->options->max_unroll_iterations) { + NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0); + } + } while (progress); +} + static void declare_nir_input_vs(struct si_shader_context *ctx, - struct nir_variable *variable, unsigned rel, + struct nir_variable *variable, + unsigned input_index, LLVMValueRef out[4]) { - si_llvm_load_input_vs(ctx, variable->data.driver_location / 4 + rel, out); + si_llvm_load_input_vs(ctx, input_index, out); } static void declare_nir_input_fs(struct si_shader_context *ctx, - struct nir_variable *variable, unsigned rel, - unsigned *fs_attr_idx, + struct nir_variable *variable, + unsigned input_index, LLVMValueRef out[4]) { - unsigned slot = variable->data.location + rel; - - assert(variable->data.location >= VARYING_SLOT_VAR0 || rel == 0); - + unsigned slot = variable->data.location; if (slot == VARYING_SLOT_POS) { out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT); out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT); @@ -338,15 +575,35 @@ static void declare_nir_input_fs(struct si_shader_context *ctx, return; } - si_llvm_load_input_fs(ctx, *fs_attr_idx, out); - (*fs_attr_idx)++; + si_llvm_load_input_fs(ctx, input_index, out); +} + +LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + unsigned vertex_index, + unsigned const_index, + LLVMTypeRef type) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + + LLVMValueRef value[4]; + for (unsigned i = component; i < num_components + component; i++) { + value[i] = si_llvm_load_input_gs(&ctx->abi, driver_location / 4, + vertex_index, type, i); + } + + return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); } static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsigned base_index, unsigned constant_index, LLVMValueRef dynamic_index, - enum ac_descriptor_type desc_type) + enum ac_descriptor_type desc_type, bool image, + bool write) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); LLVMBuilderRef builder = ctx->ac.builder; @@ -362,6 +619,21 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi, LLVMConstInt(ctx->ac.i32, base_index + constant_index, false), ""); + if (image) { + assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER); + assert(base_index + constant_index < ctx->num_images); + + if (dynamic_index) + index = si_llvm_bound_index(ctx, index, ctx->num_images); + + index = LLVMBuildSub(ctx->gallivm.builder, + LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0), + index, ""); + + /* TODO: be smarter about when we use dcc_off */ + return si_load_image_desc(ctx, list, index, desc_type, write); + } + assert(base_index + constant_index < ctx->num_samplers); if (dynamic_index) @@ -373,31 +645,62 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi, return si_load_sampler_desc(ctx, list, index, desc_type); } +static void bitcast_inputs(struct si_shader_context *ctx, + LLVMValueRef data[4], + unsigned input_idx) +{ + for (unsigned chan = 0; chan < 4; chan++) { + ctx->inputs[input_idx + chan] = + LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, ""); + } +} + bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) { - unsigned fs_attr_idx = 0; - nir_foreach_variable(variable, &nir->inputs) { - unsigned attrib_count = glsl_count_attribute_slots(variable->type, - nir->stage == MESA_SHADER_VERTEX); - unsigned input_idx = variable->data.driver_location; + struct tgsi_shader_info *info = &ctx->shader->selector->info; - for (unsigned i = 0; i < attrib_count; ++i) { - LLVMValueRef data[4]; + if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_FRAGMENT) { + uint64_t processed_inputs = 0; + nir_foreach_variable(variable, &nir->inputs) { + unsigned attrib_count = glsl_count_attribute_slots(variable->type, + nir->info.stage == MESA_SHADER_VERTEX); + unsigned input_idx = variable->data.driver_location; - if (nir->stage == MESA_SHADER_VERTEX) - declare_nir_input_vs(ctx, variable, i, data); - else if (nir->stage == MESA_SHADER_FRAGMENT) - declare_nir_input_fs(ctx, variable, i, &fs_attr_idx, data); + assert(attrib_count == 1); - for (unsigned chan = 0; chan < 4; chan++) { - ctx->inputs[input_idx + chan] = - LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, ""); + LLVMValueRef data[4]; + unsigned loc = variable->data.location; + + /* Packed components share the same location so skip + * them if we have already processed the location. + */ + if (processed_inputs & ((uint64_t)1 << loc)) + continue; + + if (nir->info.stage == MESA_SHADER_VERTEX) { + declare_nir_input_vs(ctx, variable, input_idx / 4, data); + bitcast_inputs(ctx, data, input_idx); + if (glsl_type_is_dual_slot(variable->type)) { + input_idx += 4; + declare_nir_input_vs(ctx, variable, input_idx / 4, data); + bitcast_inputs(ctx, data, input_idx); + } + } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { + declare_nir_input_fs(ctx, variable, input_idx / 4, data); + bitcast_inputs(ctx, data, input_idx); } + + processed_inputs |= ((uint64_t)1 << loc); } } ctx->abi.inputs = &ctx->inputs[0]; ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; + ctx->abi.clamp_shadow_reference = true; + + ctx->num_samplers = util_last_bit(info->samplers_declared); + ctx->num_images = util_last_bit(info->images_declared); ac_nir_translate(&ctx->ac, &ctx->abi, nir, NULL);