X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir_linking_helpers.c;h=e5cd450a014bd169701ca61cec04f0ee9d2da18f;hb=a1ed83fddd7518cc92b609d60129426bd9b4158d;hp=8d7302c93c15e4b8ab88610b5318ac797662fb93;hpb=36e000d8321814de11454c8574f2d3a8da01db8a;p=mesa.git diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c index 8d7302c93c1..e5cd450a014 100644 --- a/src/compiler/nir/nir_linking_helpers.c +++ b/src/compiler/nir/nir_linking_helpers.c @@ -50,7 +50,7 @@ get_variable_io_mask(nir_variable *var, gl_shader_stage stage) assert(var->data.location >= 0); const struct glsl_type *type = var->type; - if (nir_is_per_vertex_io(var, stage)) { + if (nir_is_per_vertex_io(var, stage) || var->data.per_view) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -109,8 +109,7 @@ tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read) * * Example usage is: * - * progress = nir_remove_unused_io_vars(producer, - * &producer->outputs, + * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, * read, patches_read) || * progress; * @@ -120,14 +119,17 @@ tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read) * variable is used! */ bool -nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list, +nir_remove_unused_io_vars(nir_shader *shader, + nir_variable_mode mode, uint64_t *used_by_other_stage, uint64_t *used_by_other_stage_patches) { bool progress = false; uint64_t *used; - nir_foreach_variable_safe(var, var_list) { + assert(mode == nir_var_shader_in || mode == nir_var_shader_out); + + nir_foreach_variable_with_modes_safe(var, shader, mode) { if (var->data.patch) used = used_by_other_stage_patches; else @@ -149,9 +151,6 @@ nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list, var->data.location = 0; var->data.mode = nir_var_shader_temp; - exec_node_remove(&var->node); - exec_list_push_tail(&shader->globals, &var->node); - progress = true; } } @@ -171,7 +170,7 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) uint64_t read[4] = { 0 }, written[4] = { 0 }; uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 }; - nir_foreach_variable(var, &producer->outputs) { + nir_foreach_shader_out_variable(var, producer) { for (unsigned i = 0; i < get_num_components(var); i++) { if (var->data.patch) { patches_written[var->data.location_frac + i] |= @@ -183,7 +182,7 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) } } - nir_foreach_variable(var, &consumer->inputs) { + nir_foreach_shader_in_variable(var, consumer) { for (unsigned i = 0; i < get_num_components(var); i++) { if (var->data.patch) { patches_read[var->data.location_frac + i] |= @@ -203,10 +202,10 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) tcs_add_output_reads(producer, read, patches_read); bool progress = false; - progress = nir_remove_unused_io_vars(producer, &producer->outputs, read, + progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read, patches_read); - progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written, + progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written, patches_written) || progress; return progress; @@ -266,12 +265,13 @@ struct assigned_comps * TODO: allow better packing of complex types. */ static void -get_unmoveable_components_masks(struct exec_list *var_list, +get_unmoveable_components_masks(nir_shader *shader, + nir_variable_mode mode, struct assigned_comps *comps, gl_shader_stage stage, bool default_to_smooth_interp) { - nir_foreach_variable_safe(var, var_list) { + nir_foreach_variable_with_modes_safe(var, shader, mode) { assert(var->data.location >= 0); /* Only remap things that aren't built-ins. */ @@ -279,7 +279,7 @@ get_unmoveable_components_masks(struct exec_list *var_list, var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { const struct glsl_type *type = var->type; - if (nir_is_per_vertex_io(var, stage)) { + if (nir_is_per_vertex_io(var, stage) || var->data.per_view) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -357,18 +357,19 @@ mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset) } static void -remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage, +remap_slots_and_components(nir_shader *shader, nir_variable_mode mode, struct varying_loc (*remap)[4], uint64_t *slots_used, uint64_t *out_slots_read, uint32_t *p_slots_used, uint32_t *p_out_slots_read) { + const gl_shader_stage stage = shader->info.stage; uint64_t out_slots_read_tmp[2] = {0}; uint64_t slots_used_tmp[2] = {0}; /* We don't touch builtins so just copy the bitmask */ slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0); - nir_foreach_variable(var, var_list) { + nir_foreach_variable_with_modes(var, shader, mode) { assert(var->data.location >= 0); /* Only remap things that aren't built-ins */ @@ -376,7 +377,7 @@ remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage, var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { const struct glsl_type *type = var->type; - if (nir_is_per_vertex_io(var, stage)) { + if (nir_is_per_vertex_io(var, stage) || var->data.per_view) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -443,6 +444,7 @@ struct varying_component { uint8_t interp_loc; bool is_32bit; bool is_patch; + bool is_intra_stage_only; bool initialised; }; @@ -456,6 +458,12 @@ cmp_varying_component(const void *comp1_v, const void *comp2_v) if (comp1->is_patch != comp2->is_patch) return comp1->is_patch ? 1 : -1; + /* We want to try to group together TCS outputs that are only read by other + * TCS invocations and not consumed by the follow stage. + */ + if (comp1->is_intra_stage_only != comp2->is_intra_stage_only) + return comp1->is_intra_stage_only ? 1 : -1; + /* We can only pack varyings with matching interpolation types so group * them together. */ @@ -471,7 +479,7 @@ cmp_varying_component(const void *comp1_v, const void *comp2_v) } static void -gather_varying_component_info(nir_shader *consumer, +gather_varying_component_info(nir_shader *producer, nir_shader *consumer, struct varying_component **varying_comp_info, unsigned *varying_comp_info_size, bool default_to_smooth_interp) @@ -482,7 +490,7 @@ gather_varying_component_info(nir_shader *consumer, /* Count the number of varying that can be packed and create a mapping * of those varyings to the array we will pass to qsort. */ - nir_foreach_variable(var, &consumer->inputs) { + nir_foreach_shader_out_variable(var, producer) { /* Only remap things that aren't builtins. */ if (var->data.location >= VARYING_SLOT_VAR0 && @@ -493,7 +501,7 @@ gather_varying_component_info(nir_shader *consumer, continue; const struct glsl_type *type = var->type; - if (nir_is_per_vertex_io(var, consumer->info.stage)) { + if (nir_is_per_vertex_io(var, producer->info.stage) || var->data.per_view) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -523,7 +531,8 @@ gather_varying_component_info(nir_shader *consumer, if (intr->intrinsic != nir_intrinsic_load_deref && intr->intrinsic != nir_intrinsic_interp_deref_at_centroid && intr->intrinsic != nir_intrinsic_interp_deref_at_sample && - intr->intrinsic != nir_intrinsic_interp_deref_at_offset) + intr->intrinsic != nir_intrinsic_interp_deref_at_offset && + intr->intrinsic != nir_intrinsic_interp_deref_at_vertex) continue; nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); @@ -549,7 +558,8 @@ gather_varying_component_info(nir_shader *consumer, if (!vc_info->initialised) { const struct glsl_type *type = in_var->type; - if (nir_is_per_vertex_io(in_var, consumer->info.stage)) { + if (nir_is_per_vertex_io(in_var, consumer->info.stage) || + in_var->data.per_view) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } @@ -560,9 +570,86 @@ gather_varying_component_info(nir_shader *consumer, vc_info->interp_loc = get_interp_loc(in_var); vc_info->is_32bit = glsl_type_is_32bit(type); vc_info->is_patch = in_var->data.patch; + vc_info->is_intra_stage_only = false; + vc_info->initialised = true; } } } + + /* Walk over the shader and populate the varying component info array + * for varyings which are read by other TCS instances but are not consumed + * by the TES. + */ + if (producer->info.stage == MESA_SHADER_TESS_CTRL) { + impl = nir_shader_get_entrypoint(producer); + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_deref) + continue; + + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + if (deref->mode != nir_var_shader_out) + continue; + + /* We only remap things that aren't builtins. */ + nir_variable *out_var = nir_deref_instr_get_variable(deref); + if (out_var->data.location < VARYING_SLOT_VAR0) + continue; + + unsigned location = out_var->data.location - VARYING_SLOT_VAR0; + if (location >= MAX_VARYINGS_INCL_PATCH) + continue; + + unsigned var_info_idx = + store_varying_info_idx[location][out_var->data.location_frac]; + if (!var_info_idx) { + /* Something went wrong, the shader interfaces didn't match, so + * abandon packing. This can happen for example when the + * inputs are scalars but the outputs are struct members. + */ + *varying_comp_info_size = 0; + break; + } + + struct varying_component *vc_info = + &(*varying_comp_info)[var_info_idx-1]; + + if (!vc_info->initialised) { + const struct glsl_type *type = out_var->type; + if (nir_is_per_vertex_io(out_var, producer->info.stage)) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); + } + + vc_info->var = out_var; + vc_info->interp_type = + get_interp_type(out_var, type, default_to_smooth_interp); + vc_info->interp_loc = get_interp_loc(out_var); + vc_info->is_32bit = glsl_type_is_32bit(type); + vc_info->is_patch = out_var->data.patch; + vc_info->is_intra_stage_only = true; + vc_info->initialised = true; + } + } + } + } + + for (unsigned i = 0; i < *varying_comp_info_size; i++ ) { + struct varying_component *vc_info = &(*varying_comp_info)[i]; + if (!vc_info->initialised) { + /* Something went wrong, the shader interfaces didn't match, so + * abandon packing. This can happen for example when the outputs are + * scalars but the inputs are struct members. + */ + *varying_comp_info_size = 0; + break; + } + } } static void @@ -640,14 +727,12 @@ compact_components(nir_shader *producer, nir_shader *consumer, struct assigned_comps *assigned_comps, bool default_to_smooth_interp) { - struct exec_list *input_list = &consumer->inputs; - struct exec_list *output_list = &producer->outputs; struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}}; struct varying_component *varying_comp_info; unsigned varying_comp_info_size; /* Gather varying component info */ - gather_varying_component_info(consumer, &varying_comp_info, + gather_varying_component_info(producer, consumer, &varying_comp_info, &varying_comp_info_size, default_to_smooth_interp); @@ -699,10 +784,10 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint64_t zero = 0; uint32_t zero32 = 0; - remap_slots_and_components(input_list, consumer->info.stage, remap, + remap_slots_and_components(consumer, nir_var_shader_in, remap, &consumer->info.inputs_read, &zero, &consumer->info.patch_inputs_read, &zero32); - remap_slots_and_components(output_list, producer->info.stage, remap, + remap_slots_and_components(producer, nir_var_shader_out, remap, &producer->info.outputs_written, &producer->info.outputs_read, &producer->info.patch_outputs_written, @@ -728,10 +813,12 @@ nir_compact_varyings(nir_shader *producer, nir_shader *consumer, struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}}; - get_unmoveable_components_masks(&producer->outputs, assigned_comps, + get_unmoveable_components_masks(producer, nir_var_shader_out, + assigned_comps, producer->info.stage, default_to_smooth_interp); - get_unmoveable_components_masks(&consumer->inputs, assigned_comps, + get_unmoveable_components_masks(consumer, nir_var_shader_in, + assigned_comps, consumer->info.stage, default_to_smooth_interp); @@ -748,7 +835,7 @@ nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer) { nir_variable *input_vars[MAX_VARYING] = { 0 }; - nir_foreach_variable(var, &consumer->inputs) { + nir_foreach_shader_in_variable(var, consumer) { if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { @@ -757,7 +844,7 @@ nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer) } } - nir_foreach_variable(var, &producer->outputs) { + nir_foreach_shader_out_variable(var, producer) { if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { @@ -782,7 +869,7 @@ does_varying_match(nir_variable *out_var, nir_variable *in_var) static nir_variable * get_matching_input_var(nir_shader *consumer, nir_variable *out_var) { - nir_foreach_variable(var, &consumer->inputs) { + nir_foreach_shader_in_variable(var, consumer) { if (does_varying_match(out_var, var)) return var; } @@ -976,7 +1063,7 @@ nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer) static void insert_sorted(struct exec_list *var_list, nir_variable *new_var) { - nir_foreach_variable(var, var_list) { + nir_foreach_variable_in_list(var, var_list) { if (var->data.location > new_var->data.location) { exec_node_insert_node_before(&var->node, &new_var->node); return; @@ -986,41 +1073,55 @@ insert_sorted(struct exec_list *var_list, nir_variable *new_var) } static void -sort_varyings(struct exec_list *var_list) +sort_varyings(nir_shader *shader, nir_variable_mode mode, + struct exec_list *sorted_list) { - struct exec_list new_list; - exec_list_make_empty(&new_list); - nir_foreach_variable_safe(var, var_list) { + exec_list_make_empty(sorted_list); + nir_foreach_variable_with_modes_safe(var, shader, mode) { exec_node_remove(&var->node); - insert_sorted(&new_list, var); + insert_sorted(sorted_list, var); } - exec_list_move_nodes_to(&new_list, var_list); } void -nir_assign_io_var_locations(struct exec_list *var_list, unsigned *size, - gl_shader_stage stage) +nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode, + unsigned *size, gl_shader_stage stage) { unsigned location = 0; unsigned assigned_locations[VARYING_SLOT_TESS_MAX]; uint64_t processed_locs[2] = {0}; - sort_varyings(var_list); - - const int base = stage == MESA_SHADER_FRAGMENT ? - (int) FRAG_RESULT_DATA0 : (int) VARYING_SLOT_VAR0; + struct exec_list io_vars; + sort_varyings(shader, mode, &io_vars); int UNUSED last_loc = 0; bool last_partial = false; - nir_foreach_variable(var, var_list) { + nir_foreach_variable_in_list(var, &io_vars) { const struct glsl_type *type = var->type; - if (nir_is_per_vertex_io(var, stage)) { + if (nir_is_per_vertex_io(var, stage) || var->data.per_view) { assert(glsl_type_is_array(type)); type = glsl_get_array_element(type); } + int base; + if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX) + base = VERT_ATTRIB_GENERIC0; + else if (var->data.mode == nir_var_shader_out && + stage == MESA_SHADER_FRAGMENT) + base = FRAG_RESULT_DATA0; + else + base = VARYING_SLOT_VAR0; + unsigned var_size; if (var->data.compact) { + /* If we are inside a partial compact, + * don't allow another compact to be in this slot + * if it starts at component 0. + */ + if (last_partial && var->data.location_frac == 0) { + location++; + } + /* compact variables must be arrays of scalars */ assert(glsl_type_is_array(type)); assert(glsl_type_is_scalar(glsl_get_array_element(type))); @@ -1098,6 +1199,106 @@ nir_assign_io_var_locations(struct exec_list *var_list, unsigned *size, if (last_partial) location++; + exec_list_append(&shader->variables, &io_vars); *size = location; } +static uint64_t +get_linked_variable_location(unsigned location, bool patch) +{ + if (!patch) + return location; + + /* Reserve locations 0...3 for special patch variables + * like tess factors and bounding boxes, and the generic patch + * variables will come after them. + */ + if (location >= VARYING_SLOT_PATCH0) + return location - VARYING_SLOT_PATCH0 + 4; + else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER && + location <= VARYING_SLOT_BOUNDING_BOX1) + return location - VARYING_SLOT_TESS_LEVEL_OUTER; + else + unreachable("Unsupported variable in get_linked_variable_location."); +} + +static uint64_t +get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage) +{ + const struct glsl_type *type = variable->type; + + if (nir_is_per_vertex_io(variable, stage)) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); + } + + unsigned slots = glsl_count_attribute_slots(type, false); + if (variable->data.compact) { + unsigned component_count = variable->data.location_frac + glsl_get_length(type); + slots = DIV_ROUND_UP(component_count, 4); + } + + uint64_t mask = u_bit_consecutive64(0, slots); + return mask; +} + +nir_linked_io_var_info +nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer) +{ + assert(producer); + assert(consumer); + + uint64_t producer_output_mask = 0; + uint64_t producer_patch_output_mask = 0; + + nir_foreach_shader_out_variable(variable, producer) { + uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage); + uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); + + if (variable->data.patch) + producer_patch_output_mask |= mask << loc; + else + producer_output_mask |= mask << loc; + } + + uint64_t consumer_input_mask = 0; + uint64_t consumer_patch_input_mask = 0; + + nir_foreach_shader_in_variable(variable, consumer) { + uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage); + uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); + + if (variable->data.patch) + consumer_patch_input_mask |= mask << loc; + else + consumer_input_mask |= mask << loc; + } + + uint64_t io_mask = producer_output_mask | consumer_input_mask; + uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask; + + nir_foreach_shader_out_variable(variable, producer) { + uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); + + if (variable->data.patch) + variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4; + else + variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4; + } + + nir_foreach_shader_in_variable(variable, consumer) { + uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); + + if (variable->data.patch) + variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)) * 4; + else + variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)) * 4; + } + + nir_linked_io_var_info result = { + .num_linked_io_vars = util_bitcount64(io_mask), + .num_linked_patch_io_vars = util_bitcount64(patch_io_mask), + }; + + return result; +}