X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fglsl%2Flink_varyings.cpp;h=4e00840c53e84b77c76d6e623ff072c3a7f76c3a;hb=c81fbb42d94293e78e9c767bb00ad22855f9e0b0;hp=e6947692affd2a5e5bc349733c883b6f0ce8cf29;hpb=0c8898dc34b1b8efc12e7491c303249bd081253d;p=mesa.git diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index e6947692aff..4e00840c53e 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -29,6 +29,7 @@ */ +#include "main/errors.h" #include "main/mtypes.h" #include "glsl_symbol_table.h" #include "glsl_parser_extras.h" @@ -37,6 +38,7 @@ #include "link_varyings.h" #include "main/macros.h" #include "util/hash_table.h" +#include "util/u_math.h" #include "program.h" @@ -77,7 +79,7 @@ create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name, create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count, NULL, NULL, varying_names); - } else if (t->is_record()) { + } else if (t->is_struct()) { for (unsigned i = 0; i < t->length; i++) { const char *field = t->fields.structure[i].name; size_t new_length = name_length; @@ -88,7 +90,7 @@ create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name, new_length, count, NULL, NULL, varying_names); } - } else if (t->without_array()->is_record() || + } else if (t->without_array()->is_struct() || t->without_array()->is_interface() || (t->is_array() && t->fields.array->is_array())) { for (unsigned i = 0; i < t->length; i++) { @@ -165,10 +167,12 @@ process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh, if (var->data.from_named_ifc_block) { type = var->get_interface_type(); + /* Find the member type before it was altered by lowering */ + const glsl_type *type_wa = type->without_array(); member_type = - type->fields.structure[type->field_index(var->name)].type; - name = ralloc_strdup(NULL, type->without_array()->name); + type_wa->fields.structure[type_wa->field_index(var->name)].type; + name = ralloc_strdup(NULL, type_wa->name); } else { type = var->type; member_type = NULL; @@ -184,31 +188,13 @@ process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh, return has_xfb_qualifiers; } -static bool -anonymous_struct_type_matches(const glsl_type *output_type, - const glsl_type *to_match) -{ - while (output_type->is_array() && to_match->is_array()) { - /* if the lengths at each level don't match fail. */ - if (output_type->length != to_match->length) - return false; - output_type = output_type->fields.array; - to_match = to_match->fields.array; - } - - if (output_type->is_array() || to_match->is_array()) - return false; - return output_type->is_anonymous() && - to_match->is_anonymous() && - to_match->record_compare(output_type); -} - /** * Validate the types and qualifiers of an output from one stage against the * matching input to another stage. */ static void -cross_validate_types_and_qualifiers(struct gl_shader_program *prog, +cross_validate_types_and_qualifiers(struct gl_context *ctx, + struct gl_shader_program *prog, const ir_variable *input, const ir_variable *output, gl_shader_stage consumer_stage, @@ -228,38 +214,51 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, } if (type_to_match != output->type) { - /* There is a bit of a special case for gl_TexCoord. This - * built-in is unsized by default. Applications that variable - * access it must redeclare it with a size. There is some - * language in the GLSL spec that implies the fragment shader - * and vertex shader do not have to agree on this size. Other - * driver behave this way, and one or two applications seem to - * rely on it. - * - * Neither declaration needs to be modified here because the array - * sizes are fixed later when update_array_sizes is called. - * - * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: - * - * "Unlike user-defined varying variables, the built-in - * varying variables don't have a strict one-to-one - * correspondence between the vertex language and the - * fragment language." - */ - if (!output->type->is_array() || !is_gl_identifier(output->name)) { - bool anon_matches = anonymous_struct_type_matches(output->type, type_to_match); - - if (!anon_matches) { + if (output->type->is_struct()) { + /* Structures across shader stages can have different name + * and considered to match in type if and only if structure + * members match in name, type, qualification, and declaration + * order. + */ + if (!output->type->record_compare(type_to_match, false, true)) { linker_error(prog, - "%s shader output `%s' declared as type `%s', " - "but %s shader input declared as type `%s'\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - output->type->name, - _mesa_shader_stage_to_string(consumer_stage), - input->type->name); - return; + "%s shader output `%s' declared as struct `%s', " + "doesn't match in type with %s shader input " + "declared as struct `%s'\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + output->type->name, + _mesa_shader_stage_to_string(consumer_stage), + input->type->name); } + } else if (!output->type->is_array() || !is_gl_identifier(output->name)) { + /* There is a bit of a special case for gl_TexCoord. This + * built-in is unsized by default. Applications that variable + * access it must redeclare it with a size. There is some + * language in the GLSL spec that implies the fragment shader + * and vertex shader do not have to agree on this size. Other + * driver behave this way, and one or two applications seem to + * rely on it. + * + * Neither declaration needs to be modified here because the array + * sizes are fixed later when update_array_sizes is called. + * + * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: + * + * "Unlike user-defined varying variables, the built-in + * varying variables don't have a strict one-to-one + * correspondence between the vertex language and the + * fragment language." + */ + linker_error(prog, + "%s shader output `%s' declared as type `%s', " + "but %s shader input declared as type `%s'\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + output->type->name, + _mesa_shader_stage_to_string(consumer_stage), + input->type->name); + return; } } @@ -327,16 +326,16 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, * "The invariance of varyings that are declared in both the vertex * and fragment shaders must match." */ - if (input->data.invariant != output->data.invariant && + if (input->data.explicit_invariant != output->data.explicit_invariant && prog->data->Version < (prog->IsES ? 300 : 430)) { linker_error(prog, "%s shader output `%s' %s invariant qualifier, " "but %s shader input %s invariant qualifier\n", _mesa_shader_stage_to_string(producer_stage), output->name, - (output->data.invariant) ? "has" : "lacks", + (output->data.explicit_invariant) ? "has" : "lacks", _mesa_shader_stage_to_string(consumer_stage), - (input->data.invariant) ? "has" : "lacks"); + (input->data.explicit_invariant) ? "has" : "lacks"); return; } @@ -348,20 +347,48 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, * "It is a link-time error if, within the same stage, the interpolation * qualifiers of variables of the same name do not match. * + * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says: + * + * "When no interpolation qualifier is present, smooth interpolation + * is used." + * + * So we match variables where one is smooth and the other has no explicit + * qualifier. */ - if (input->data.interpolation != output->data.interpolation && + unsigned input_interpolation = input->data.interpolation; + unsigned output_interpolation = output->data.interpolation; + if (prog->IsES) { + if (input_interpolation == INTERP_MODE_NONE) + input_interpolation = INTERP_MODE_SMOOTH; + if (output_interpolation == INTERP_MODE_NONE) + output_interpolation = INTERP_MODE_SMOOTH; + } + if (input_interpolation != output_interpolation && prog->data->Version < 440) { - linker_error(prog, - "%s shader output `%s' specifies %s " - "interpolation qualifier, " - "but %s shader input specifies %s " - "interpolation qualifier\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - interpolation_string(output->data.interpolation), - _mesa_shader_stage_to_string(consumer_stage), - interpolation_string(input->data.interpolation)); - return; + if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) { + linker_error(prog, + "%s shader output `%s' specifies %s " + "interpolation qualifier, " + "but %s shader input specifies %s " + "interpolation qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + interpolation_string(output->data.interpolation), + _mesa_shader_stage_to_string(consumer_stage), + interpolation_string(input->data.interpolation)); + return; + } else { + linker_warning(prog, + "%s shader output `%s' specifies %s " + "interpolation qualifier, " + "but %s shader input specifies %s " + "interpolation qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + interpolation_string(output->data.interpolation), + _mesa_shader_stage_to_string(consumer_stage), + interpolation_string(input->data.interpolation)); + } } } @@ -369,7 +396,8 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, * Validate front and back color outputs against single color input */ static void -cross_validate_front_and_back_color(struct gl_shader_program *prog, +cross_validate_front_and_back_color(struct gl_context *ctx, + struct gl_shader_program *prog, const ir_variable *input, const ir_variable *front_color, const ir_variable *back_color, @@ -377,25 +405,342 @@ cross_validate_front_and_back_color(struct gl_shader_program *prog, gl_shader_stage producer_stage) { if (front_color != NULL && front_color->data.assigned) - cross_validate_types_and_qualifiers(prog, input, front_color, + cross_validate_types_and_qualifiers(ctx, prog, input, front_color, consumer_stage, producer_stage); if (back_color != NULL && back_color->data.assigned) - cross_validate_types_and_qualifiers(prog, input, back_color, + cross_validate_types_and_qualifiers(ctx, prog, input, back_color, consumer_stage, producer_stage); } +static unsigned +compute_variable_location_slot(ir_variable *var, gl_shader_stage stage) +{ + unsigned location_start = VARYING_SLOT_VAR0; + + switch (stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == ir_var_shader_in) + location_start = VERT_ATTRIB_GENERIC0; + break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + if (var->data.patch) + location_start = VARYING_SLOT_PATCH0; + break; + case MESA_SHADER_FRAGMENT: + if (var->data.mode == ir_var_shader_out) + location_start = FRAG_RESULT_DATA0; + break; + default: + break; + } + + return var->data.location - location_start; +} + +struct explicit_location_info { + ir_variable *var; + bool base_type_is_integer; + unsigned base_type_bit_size; + unsigned interpolation; + bool centroid; + bool sample; + bool patch; +}; + +static bool +check_location_aliasing(struct explicit_location_info explicit_locations[][4], + ir_variable *var, + unsigned location, + unsigned component, + unsigned location_limit, + const glsl_type *type, + unsigned interpolation, + bool centroid, + bool sample, + bool patch, + gl_shader_program *prog, + gl_shader_stage stage) +{ + unsigned last_comp; + unsigned base_type_bit_size; + const glsl_type *type_without_array = type->without_array(); + const bool base_type_is_integer = + glsl_base_type_is_integer(type_without_array->base_type); + const bool is_struct = type_without_array->is_struct(); + if (is_struct) { + /* structs don't have a defined underlying base type so just treat all + * component slots as used and set the bit size to 0. If there is + * location aliasing, we'll fail anyway later. + */ + last_comp = 4; + base_type_bit_size = 0; + } else { + unsigned dmul = type_without_array->is_64bit() ? 2 : 1; + last_comp = component + type_without_array->vector_elements * dmul; + base_type_bit_size = + glsl_base_type_get_bit_size(type_without_array->base_type); + } + + while (location < location_limit) { + unsigned comp = 0; + while (comp < 4) { + struct explicit_location_info *info = + &explicit_locations[location][comp]; + + if (info->var) { + if (info->var->type->without_array()->is_struct() || is_struct) { + /* Structs cannot share location since they are incompatible + * with any other underlying numerical type. + */ + linker_error(prog, + "%s shader has multiple %sputs sharing the " + "same location that don't have the same " + "underlying numerical type. Struct variable '%s', " + "location %u\n", + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? "in" : "out", + is_struct ? var->name : info->var->name, + location); + return false; + } else if (comp >= component && comp < last_comp) { + /* Component aliasing is not allowed */ + linker_error(prog, + "%s shader has multiple %sputs explicitly " + "assigned to location %d and component %d\n", + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? "in" : "out", + location, comp); + return false; + } else { + /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout + * Qualifiers, Page 67, (Location aliasing): + * + * " Further, when location aliasing, the aliases sharing the + * location must have the same underlying numerical type + * and bit width (floating-point or integer, 32-bit versus + * 64-bit, etc.) and the same auxiliary storage and + * interpolation qualification." + */ + + /* If the underlying numerical type isn't integer, implicitly + * it will be float or else we would have failed by now. + */ + if (info->base_type_is_integer != base_type_is_integer) { + linker_error(prog, + "%s shader has multiple %sputs sharing the " + "same location that don't have the same " + "underlying numerical type. Location %u " + "component %u.\n", + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? + "in" : "out", location, comp); + return false; + } + + if (info->base_type_bit_size != base_type_bit_size) { + linker_error(prog, + "%s shader has multiple %sputs sharing the " + "same location that don't have the same " + "underlying numerical bit size. Location %u " + "component %u.\n", + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? + "in" : "out", location, comp); + return false; + } + + if (info->interpolation != interpolation) { + linker_error(prog, + "%s shader has multiple %sputs sharing the " + "same location that don't have the same " + "interpolation qualification. Location %u " + "component %u.\n", + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? + "in" : "out", location, comp); + return false; + } + + if (info->centroid != centroid || + info->sample != sample || + info->patch != patch) { + linker_error(prog, + "%s shader has multiple %sputs sharing the " + "same location that don't have the same " + "auxiliary storage qualification. Location %u " + "component %u.\n", + _mesa_shader_stage_to_string(stage), + var->data.mode == ir_var_shader_in ? + "in" : "out", location, comp); + return false; + } + } + } else if (comp >= component && comp < last_comp) { + info->var = var; + info->base_type_is_integer = base_type_is_integer; + info->base_type_bit_size = base_type_bit_size; + info->interpolation = interpolation; + info->centroid = centroid; + info->sample = sample; + info->patch = patch; + } + + comp++; + + /* We need to do some special handling for doubles as dvec3 and + * dvec4 consume two consecutive locations. We don't need to + * worry about components beginning at anything other than 0 as + * the spec does not allow this for dvec3 and dvec4. + */ + if (comp == 4 && last_comp > 4) { + last_comp = last_comp - 4; + /* Bump location index and reset the component index */ + location++; + comp = 0; + component = 0; + } + } + + location++; + } + + return true; +} + +static bool +validate_explicit_variable_location(struct gl_context *ctx, + struct explicit_location_info explicit_locations[][4], + ir_variable *var, + gl_shader_program *prog, + gl_linked_shader *sh) +{ + const glsl_type *type = get_varying_type(var, sh->Stage); + unsigned num_elements = type->count_attribute_slots(false); + unsigned idx = compute_variable_location_slot(var, sh->Stage); + unsigned slot_limit = idx + num_elements; + + /* Vertex shader inputs and fragment shader outputs are validated in + * assign_attribute_or_color_locations() so we should not attempt to + * validate them again here. + */ + unsigned slot_max; + if (var->data.mode == ir_var_shader_out) { + assert(sh->Stage != MESA_SHADER_FRAGMENT); + slot_max = + ctx->Const.Program[sh->Stage].MaxOutputComponents / 4; + } else { + assert(var->data.mode == ir_var_shader_in); + assert(sh->Stage != MESA_SHADER_VERTEX); + slot_max = + ctx->Const.Program[sh->Stage].MaxInputComponents / 4; + } + + if (slot_limit > slot_max) { + linker_error(prog, + "Invalid location %u in %s shader\n", + idx, _mesa_shader_stage_to_string(sh->Stage)); + return false; + } + + const glsl_type *type_without_array = type->without_array(); + if (type_without_array->is_interface()) { + for (unsigned i = 0; i < type_without_array->length; i++) { + glsl_struct_field *field = &type_without_array->fields.structure[i]; + unsigned field_location = field->location - + (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0); + if (!check_location_aliasing(explicit_locations, var, + field_location, + 0, field_location + 1, + field->type, + field->interpolation, + field->centroid, + field->sample, + field->patch, + prog, sh->Stage)) { + return false; + } + } + } else if (!check_location_aliasing(explicit_locations, var, + idx, var->data.location_frac, + slot_limit, type, + var->data.interpolation, + var->data.centroid, + var->data.sample, + var->data.patch, + prog, sh->Stage)) { + return false; + } + + return true; +} + +/** + * Validate explicit locations for the inputs to the first stage and the + * outputs of the last stage in a program, if those are not the VS and FS + * shaders. + */ +void +validate_first_and_last_interface_explicit_locations(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader_stage first_stage, + gl_shader_stage last_stage) +{ + /* VS inputs and FS outputs are validated in + * assign_attribute_or_color_locations() + */ + bool validate_first_stage = first_stage != MESA_SHADER_VERTEX; + bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT; + if (!validate_first_stage && !validate_last_stage) + return; + + struct explicit_location_info explicit_locations[MAX_VARYING][4]; + + gl_shader_stage stages[2] = { first_stage, last_stage }; + bool validate_stage[2] = { validate_first_stage, validate_last_stage }; + ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out }; + + for (unsigned i = 0; i < 2; i++) { + if (!validate_stage[i]) + continue; + + gl_shader_stage stage = stages[i]; + + gl_linked_shader *sh = prog->_LinkedShaders[stage]; + assert(sh); + + memset(explicit_locations, 0, sizeof(explicit_locations)); + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || + !var->data.explicit_location || + var->data.location < VARYING_SLOT_VAR0 || + var->data.mode != var_direction[i]) + continue; + + if (!validate_explicit_variable_location( + ctx, explicit_locations, var, prog, sh)) { + return; + } + } + } +} + /** * Validate that outputs from one stage match inputs of another */ void -cross_validate_outputs_to_inputs(struct gl_shader_program *prog, +cross_validate_outputs_to_inputs(struct gl_context *ctx, + struct gl_shader_program *prog, gl_linked_shader *producer, gl_linked_shader *consumer) { glsl_symbol_table parameters; - ir_variable *explicit_locations[MAX_VARYINGS_INCL_PATCH][4] = - { {NULL, NULL} }; + struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {}; + struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {}; /* Find all shader outputs in the "producer" stage. */ @@ -412,66 +757,10 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, /* User-defined varyings with explicit locations are handled * differently because they do not need to have matching names. */ - const glsl_type *type = get_varying_type(var, producer->Stage); - unsigned num_elements = type->count_attribute_slots(false); - unsigned idx = var->data.location - VARYING_SLOT_VAR0; - unsigned slot_limit = idx + num_elements; - unsigned last_comp; - - if (type->without_array()->is_record()) { - /* The component qualifier can't be used on structs so just treat - * all component slots as used. - */ - last_comp = 4; - } else { - unsigned dmul = type->without_array()->is_64bit() ? 2 : 1; - last_comp = var->data.location_frac + - type->without_array()->vector_elements * dmul; - } - - while (idx < slot_limit) { - unsigned i = var->data.location_frac; - while (i < last_comp) { - if (explicit_locations[idx][i] != NULL) { - linker_error(prog, - "%s shader has multiple outputs explicitly " - "assigned to location %d and component %d\n", - _mesa_shader_stage_to_string(producer->Stage), - idx, var->data.location_frac); - return; - } - - /* Make sure all component at this location have the same type. - */ - for (unsigned j = 0; j < 4; j++) { - if (explicit_locations[idx][j] && - (explicit_locations[idx][j]->type->without_array() - ->base_type != type->without_array()->base_type)) { - linker_error(prog, - "Varyings sharing the same location must " - "have the same underlying numerical type. " - "Location %u component %u\n", idx, - var->data.location_frac); - return; - } - } - - explicit_locations[idx][i] = var; - i++; - - /* We need to do some special handling for doubles as dvec3 and - * dvec4 consume two consecutive locations. We don't need to - * worry about components beginning at anything other than 0 as - * the spec does not allow this for dvec3 and dvec4. - */ - if (i == 4 && last_comp > 4) { - last_comp = last_comp - 4; - /* Bump location index and reset the component index */ - idx++; - i = 0; - } - } - idx++; + if (!validate_explicit_variable_location(ctx, + output_explicit_locations, + var, prog, producer)) { + return; } } } @@ -498,7 +787,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, const ir_variable *const back_color = parameters.get_variable("gl_BackColor"); - cross_validate_front_and_back_color(prog, input, + cross_validate_front_and_back_color(ctx, prog, input, front_color, back_color, consumer->Stage, producer->Stage); } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { @@ -508,7 +797,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, const ir_variable *const back_color = parameters.get_variable("gl_BackSecondaryColor"); - cross_validate_front_and_back_color(prog, input, + cross_validate_front_and_back_color(ctx, prog, input, front_color, back_color, consumer->Stage, producer->Stage); } else { @@ -523,14 +812,40 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, const glsl_type *type = get_varying_type(input, consumer->Stage); unsigned num_elements = type->count_attribute_slots(false); - unsigned idx = input->data.location - VARYING_SLOT_VAR0; + unsigned idx = + compute_variable_location_slot(input, consumer->Stage); unsigned slot_limit = idx + num_elements; + if (!validate_explicit_variable_location(ctx, + input_explicit_locations, + input, prog, consumer)) { + return; + } + while (idx < slot_limit) { - output = explicit_locations[idx][input->data.location_frac]; + if (idx >= MAX_VARYING) { + linker_error(prog, + "Invalid location %u in %s shader\n", idx, + _mesa_shader_stage_to_string(consumer->Stage)); + return; + } + + output = output_explicit_locations[idx][input->data.location_frac].var; - if (output == NULL || - input->data.location != output->data.location) { + if (output == NULL) { + /* A linker failure should only happen when there is no + * output declaration and there is Static Use of the + * declared input. + */ + if (input->data.used) { + linker_error(prog, + "%s shader input `%s' with explicit location " + "has no matching output\n", + _mesa_shader_stage_to_string(consumer->Stage), + input->name); + break; + } + } else if (input->data.location != output->data.location) { linker_error(prog, "%s shader input `%s' with explicit location " "has no matching output\n", @@ -550,7 +865,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, */ if (!(input->get_interface_type() && output->get_interface_type())) - cross_validate_types_and_qualifiers(prog, input, output, + cross_validate_types_and_qualifiers(ctx, prog, input, output, consumer->Stage, producer->Stage); } else { @@ -560,7 +875,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, */ assert(!input->data.assigned); if (input->data.used && !input->get_interface_type() && - !input->data.explicit_location && !prog->SeparateShader) + !input->data.explicit_location) linker_error(prog, "%s shader input `%s' " "has no matching output in the previous stage\n", @@ -854,8 +1169,10 @@ bool tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, struct gl_transform_feedback_info *info, unsigned buffer, unsigned buffer_index, - const unsigned max_outputs, bool *explicit_stride, - bool has_xfb_qualifiers) const + const unsigned max_outputs, + BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS], + bool *explicit_stride, bool has_xfb_qualifiers, + const void* mem_ctx) const { unsigned xfb_offset = 0; unsigned size = this->size; @@ -882,6 +1199,72 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, unsigned location = this->location; unsigned location_frac = this->location_frac; unsigned num_components = this->num_components(); + + /* From GL_EXT_transform_feedback: + * + * " A program will fail to link if: + * + * * the total number of components to capture is greater than the + * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT + * and the buffer mode is INTERLEAVED_ATTRIBS_EXT." + * + * From GL_ARB_enhanced_layouts: + * + * " The resulting stride (implicit or explicit) must be less than or + * equal to the implementation-dependent constant + * gl_MaxTransformFeedbackInterleavedComponents." + */ + if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS || + has_xfb_qualifiers) && + xfb_offset + num_components > + ctx->Const.MaxTransformFeedbackInterleavedComponents) { + linker_error(prog, + "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " + "limit has been exceeded."); + return false; + } + + /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers, + * Page 76, (Transform Feedback Layout Qualifiers): + * + * " No aliasing in output buffers is allowed: It is a compile-time or + * link-time error to specify variables with overlapping transform + * feedback offsets." + */ + const unsigned max_components = + ctx->Const.MaxTransformFeedbackInterleavedComponents; + const unsigned first_component = xfb_offset; + const unsigned last_component = xfb_offset + num_components - 1; + const unsigned start_word = BITSET_BITWORD(first_component); + const unsigned end_word = BITSET_BITWORD(last_component); + BITSET_WORD *used; + assert(last_component < max_components); + + if (!used_components[buffer]) { + used_components[buffer] = + rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components)); + } + used = used_components[buffer]; + + for (unsigned word = start_word; word <= end_word; word++) { + unsigned start_range = 0; + unsigned end_range = BITSET_WORDBITS - 1; + + if (word == start_word) + start_range = first_component % BITSET_WORDBITS; + + if (word == end_word) + end_range = last_component % BITSET_WORDBITS; + + if (used[word] & BITSET_RANGE(start_range, end_range)) { + linker_error(prog, + "variable '%s', xfb_offset (%d) is causing aliasing.", + this->orig_name, xfb_offset * 4); + return false; + } + used[word] |= BITSET_RANGE(start_range, end_range); + } + while (num_components > 0) { unsigned output_size = MIN2(num_components, 4 - location_frac); assert((info->NumOutputs == 0 && max_outputs == 0) || @@ -922,8 +1305,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, return false; } - if ((this->offset / 4) / info->Buffers[buffer].Stride != - (xfb_offset - 1) / info->Buffers[buffer].Stride) { + if (xfb_offset > info->Buffers[buffer].Stride) { linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for " "buffer (%d)", xfb_offset * 4, info->Buffers[buffer].Stride * 4, buffer); @@ -933,28 +1315,6 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, info->Buffers[buffer].Stride = xfb_offset; } - /* From GL_EXT_transform_feedback: - * A program will fail to link if: - * - * * the total number of components to capture is greater than - * the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT - * and the buffer mode is INTERLEAVED_ATTRIBS_EXT. - * - * From GL_ARB_enhanced_layouts: - * - * "The resulting stride (implicit or explicit) must be less than or - * equal to the implementation-dependent constant - * gl_MaxTransformFeedbackInterleavedComponents." - */ - if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS || - has_xfb_qualifiers) && - info->Buffers[buffer].Stride > - ctx->Const.MaxTransformFeedbackInterleavedComponents) { - linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " - "limit has been exceeded."); - return false; - } - store_varying: info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, this->orig_name); @@ -1040,13 +1400,12 @@ parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, * feedback of arrays would be useless otherwise. */ for (unsigned j = 0; j < i; ++j) { - if (!decls[j].is_varying()) - continue; - - if (tfeedback_decl::is_same(decls[i], decls[j])) { - linker_error(prog, "Transform feedback varying %s specified " - "more than once.", varying_names[i]); - return false; + if (decls[j].is_varying()) { + if (tfeedback_decl::is_same(decls[i], decls[j])) { + linker_error(prog, "Transform feedback varying %s specified " + "more than once.", varying_names[i]); + return false; + } } } } @@ -1076,7 +1435,8 @@ cmp_xfb_offset(const void * x_generic, const void * y_generic) static bool store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, unsigned num_tfeedback_decls, - tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers) + tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers, + const void *mem_ctx) { if (!prog->last_vert_prog) return true; @@ -1097,9 +1457,10 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, * however some drivers expect to receive the list of transform feedback * declarations in order so sort it now for convenience. */ - if (has_xfb_qualifiers) + if (has_xfb_qualifiers) { qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls), cmp_xfb_offset); + } xfb_prog->sh.LinkedTransformFeedback->Varyings = rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info, @@ -1117,6 +1478,7 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, unsigned num_buffers = 0; unsigned buffers = 0; + BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {}; if (!has_xfb_qualifiers && separate_attribs_mode) { /* GL_SEPARATE_ATTRIBS */ @@ -1124,7 +1486,8 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, if (!tfeedback_decls[i].store(ctx, prog, xfb_prog->sh.LinkedTransformFeedback, num_buffers, num_buffers, num_outputs, - NULL, has_xfb_qualifiers)) + used_components, NULL, + has_xfb_qualifiers, mem_ctx)) return false; buffers |= 1 << num_buffers; @@ -1142,7 +1505,6 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, if (has_xfb_qualifiers) { for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { if (prog->TransformFeedback.BufferStride[j]) { - buffers |= 1 << j; explicit_stride[j] = true; xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride = prog->TransformFeedback.BufferStride[j] / 4; @@ -1162,15 +1524,30 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, if (!tfeedback_decls[i].store(ctx, prog, xfb_prog->sh.LinkedTransformFeedback, buffer, num_buffers, num_outputs, - explicit_stride, has_xfb_qualifiers)) + used_components, explicit_stride, + has_xfb_qualifiers, mem_ctx)) return false; num_buffers++; buffer_stream_id = -1; continue; - } else if (tfeedback_decls[i].is_varying()) { + } + + if (has_xfb_qualifiers) { + buffer = tfeedback_decls[i].get_buffer(); + } else { + buffer = num_buffers; + } + + if (tfeedback_decls[i].is_varying()) { if (buffer_stream_id == -1) { /* First varying writing to this buffer: remember its stream */ buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); + + /* Only mark a buffer as active when there is a varying + * attached to it. This behaviour is based on a revised version + * of section 13.2.2 of the GL 4.6 spec. + */ + buffers |= 1 << buffer; } else if (buffer_stream_id != (int) tfeedback_decls[i].get_stream_id()) { /* Varying writes to the same buffer from a different stream */ @@ -1186,17 +1563,11 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, } } - if (has_xfb_qualifiers) { - buffer = tfeedback_decls[i].get_buffer(); - } else { - buffer = num_buffers; - } - buffers |= 1 << buffer; - if (!tfeedback_decls[i].store(ctx, prog, xfb_prog->sh.LinkedTransformFeedback, buffer, num_buffers, num_outputs, - explicit_stride, has_xfb_qualifiers)) + used_components, explicit_stride, + has_xfb_qualifiers, mem_ctx)) return false; } } @@ -1217,18 +1588,19 @@ class varying_matches { public: varying_matches(bool disable_varying_packing, bool xfb_enabled, + bool enhanced_layouts_enabled, gl_shader_stage producer_stage, gl_shader_stage consumer_stage); ~varying_matches(); void record(ir_variable *producer_var, ir_variable *consumer_var); unsigned assign_locations(struct gl_shader_program *prog, - uint8_t *components, + uint8_t components[], uint64_t reserved_slots); void store_locations() const; private: bool is_varying_packing_safe(const glsl_type *type, - const ir_variable *var); + const ir_variable *var) const; /** * If true, this driver disables varying packing, so all varyings need to @@ -1250,6 +1622,8 @@ private: */ const bool xfb_enabled; + const bool enhanced_layouts_enabled; + /** * Enum representing the order in which varyings are packed within a * packing class. @@ -1326,10 +1700,12 @@ private: varying_matches::varying_matches(bool disable_varying_packing, bool xfb_enabled, + bool enhanced_layouts_enabled, gl_shader_stage producer_stage, gl_shader_stage consumer_stage) : disable_varying_packing(disable_varying_packing), xfb_enabled(xfb_enabled), + enhanced_layouts_enabled(enhanced_layouts_enabled), producer_stage(producer_stage), consumer_stage(consumer_stage) { @@ -1357,14 +1733,14 @@ varying_matches::~varying_matches() */ bool varying_matches::is_varying_packing_safe(const glsl_type *type, - const ir_variable *var) + const ir_variable *var) const { if (consumer_stage == MESA_SHADER_TESS_EVAL || consumer_stage == MESA_SHADER_TESS_CTRL || producer_stage == MESA_SHADER_TESS_CTRL) return false; - return xfb_enabled && (type->is_array() || type->is_record() || + return xfb_enabled && (type->is_array() || type->is_struct() || type->is_matrix() || var->data.is_xfb_only); } @@ -1407,7 +1783,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) if (!disable_varying_packing && (needs_flat_qualifier || - (consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT))) { + (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) { /* Since this varying is not being consumed by the fragment shader, its * interpolation type varying cannot possibly affect rendering. * Also, this variable is non-flat and is (or contains) an integer @@ -1461,17 +1837,24 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) ? consumer_stage : producer_stage; const glsl_type *type = get_varying_type(var, stage); + if (producer_var && consumer_var && + consumer_var->data.must_be_shader_input) { + producer_var->data.must_be_shader_input = 1; + } + this->matches[this->num_matches].packing_class = this->compute_packing_class(var); this->matches[this->num_matches].packing_order = this->compute_packing_order(var); - if (this->disable_varying_packing && !is_varying_packing_safe(type, var)) { + if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) || + var->data.must_be_shader_input) { unsigned slots = type->count_attribute_slots(false); this->matches[this->num_matches].num_components = slots * 4; } else { this->matches[this->num_matches].num_components = type->component_slots(); } + this->matches[this->num_matches].producer_var = producer_var; this->matches[this->num_matches].consumer_var = consumer_var; this->num_matches++; @@ -1485,10 +1868,15 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) /** * Choose locations for all of the variable matches that were previously * passed to varying_matches::record(). + * \param components returns array[slot] of number of components used + * per slot (1, 2, 3 or 4) + * \param reserved_slots bitmask indicating which varying slots are already + * allocated + * \return number of slots (4-element vectors) allocated */ unsigned varying_matches::assign_locations(struct gl_shader_program *prog, - uint8_t *components, + uint8_t components[], uint64_t reserved_slots) { /* If packing has been disabled then we cannot safely sort the varyings by @@ -1513,13 +1901,24 @@ varying_matches::assign_locations(struct gl_shader_program *prog, unsigned generic_location = 0; unsigned generic_patch_location = MAX_VARYING*4; bool previous_var_xfb_only = false; + unsigned previous_packing_class = ~0u; + + /* For tranform feedback separate mode, we know the number of attributes + * is <= the number of buffers. So packing isn't critical. In fact, + * packing vec3 attributes can cause trouble because splitting a vec3 + * effectively creates an additional transform feedback output. The + * extra TFB output may exceed device driver limits. + */ + const bool dont_pack_vec3 = + (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && + prog->TransformFeedback.NumVarying > 0); for (unsigned i = 0; i < this->num_matches; i++) { unsigned *location = &generic_location; - const ir_variable *var; const glsl_type *type; bool is_vertex_input = false; + if (matches[i].consumer_var) { var = matches[i].consumer_var; type = get_varying_type(var, consumer_stage); @@ -1544,14 +1943,17 @@ varying_matches::assign_locations(struct gl_shader_program *prog, * we can pack varyings together that are only used for transform * feedback. */ - if ((this->disable_varying_packing && + if (var->data.must_be_shader_input || + (this->disable_varying_packing && !(previous_var_xfb_only && var->data.is_xfb_only)) || - (i > 0 && this->matches[i - 1].packing_class - != this->matches[i].packing_class )) { + (previous_packing_class != this->matches[i].packing_class) || + (this->matches[i].packing_order == PACKING_ORDER_VEC3 && + dont_pack_vec3)) { *location = ALIGN(*location, 4); } previous_var_xfb_only = var->data.is_xfb_only; + previous_packing_class = this->matches[i].packing_class; /* The number of components taken up by this variable. For vertex shader * inputs, we use the number of slots * 4, as they have different @@ -1575,13 +1977,13 @@ varying_matches::assign_locations(struct gl_shader_program *prog, const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u); assert(slots > 0); - if (reserved_slots & slot_mask) { - *location = ALIGN(*location + 1, 4); - slot_end = *location + num_components - 1; - continue; + + if ((reserved_slots & slot_mask) == 0) { + break; } - break; + *location = ALIGN(*location + 1, 4); + slot_end = *location + num_components - 1; } if (!var->data.patch && slot_end >= MAX_VARYING * 4u) { @@ -1614,6 +2016,12 @@ varying_matches::assign_locations(struct gl_shader_program *prog, void varying_matches::store_locations() const { + /* Check is location needs to be packed with lower_packed_varyings() or if + * we can just use ARB_enhanced_layouts packing. + */ + bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 }; + const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} }; + for (unsigned i = 0; i < this->num_matches; i++) { ir_variable *producer_var = this->matches[i].producer_var; ir_variable *consumer_var = this->matches[i].consumer_var; @@ -1631,6 +2039,64 @@ varying_matches::store_locations() const consumer_var->data.location = VARYING_SLOT_VAR0 + slot; consumer_var->data.location_frac = offset; } + + /* Find locations suitable for native packing via + * ARB_enhanced_layouts. + */ + if (producer_var && consumer_var) { + if (enhanced_layouts_enabled) { + const glsl_type *type = + get_varying_type(producer_var, producer_stage); + if (type->is_array() || type->is_matrix() || type->is_struct() || + type->is_double()) { + unsigned comp_slots = type->component_slots() + offset; + unsigned slots = comp_slots / 4; + if (comp_slots % 4) + slots += 1; + + for (unsigned j = 0; j < slots; j++) { + pack_loc[slot + j] = true; + } + } else if (offset + type->vector_elements > 4) { + pack_loc[slot] = true; + pack_loc[slot + 1] = true; + } else { + loc_type[slot][offset] = type; + } + } + } + } + + /* Attempt to use ARB_enhanced_layouts for more efficient packing if + * suitable. + */ + if (enhanced_layouts_enabled) { + for (unsigned i = 0; i < this->num_matches; i++) { + ir_variable *producer_var = this->matches[i].producer_var; + ir_variable *consumer_var = this->matches[i].consumer_var; + unsigned generic_location = this->matches[i].generic_location; + unsigned slot = generic_location / 4; + + if (pack_loc[slot] || !producer_var || !consumer_var) + continue; + + const glsl_type *type = + get_varying_type(producer_var, producer_stage); + bool type_match = true; + for (unsigned j = 0; j < 4; j++) { + if (loc_type[slot][j]) { + if (type->base_type != loc_type[slot][j]->base_type) + type_match = false; + } + } + + if (type_match) { + producer_var->data.explicit_location = 1; + consumer_var->data.explicit_location = 1; + producer_var->data.explicit_component = 1; + consumer_var->data.explicit_component = 1; + } + } } } @@ -1659,11 +2125,17 @@ varying_matches::compute_packing_class(const ir_variable *var) * * Therefore, the packing class depends only on the interpolation type. */ - unsigned packing_class = var->data.centroid | (var->data.sample << 1) | - (var->data.patch << 2); - packing_class *= 4; - packing_class += var->is_interpolation_flat() + const unsigned interp = var->is_interpolation_flat() ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation; + + assert(interp < (1 << 3)); + + const unsigned packing_class = (interp << 0) | + (var->data.centroid << 3) | + (var->data.sample << 4) | + (var->data.patch << 5) | + (var->data.must_be_shader_input << 6); + return packing_class; } @@ -1720,7 +2192,7 @@ varying_matches::xfb_comparator(const void *x_generic, const void *y_generic) const match *x = (const match *) x_generic; if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) - return match_comparator(x_generic, y_generic); + return match_comparator(x_generic, y_generic); /* FIXME: When the comparator returns 0 it means the elements being * compared are equivalent. However the qsort documentation says: @@ -1773,9 +2245,11 @@ class tfeedback_candidate_generator : public program_resource_visitor { public: tfeedback_candidate_generator(void *mem_ctx, - hash_table *tfeedback_candidates) + hash_table *tfeedback_candidates, + gl_shader_stage stage) : mem_ctx(mem_ctx), tfeedback_candidates(tfeedback_candidates), + stage(stage), toplevel_var(NULL), varying_floats(0) { @@ -1785,10 +2259,17 @@ public: { /* All named varying interface blocks should be flattened by now */ assert(!var->is_interface_instance()); + assert(var->data.mode == ir_var_shader_out); this->toplevel_var = var; this->varying_floats = 0; - program_resource_visitor::process(var); + const glsl_type *t = + var->data.from_named_ifc_block ? var->get_interface_type() : var->type; + if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) { + assert(t->is_array()); + t = t->fields.array; + } + program_resource_visitor::process(var, t, false); } private: @@ -1798,7 +2279,7 @@ private: const enum glsl_interface_packing, bool /* last_field */) { - assert(!type->without_array()->is_record()); + assert(!type->without_array()->is_struct()); assert(!type->without_array()->is_interface()); tfeedback_candidate *candidate @@ -1822,6 +2303,8 @@ private: */ hash_table * const tfeedback_candidates; + gl_shader_stage stage; + /** * Pointer to the toplevel variable that is being traversed. */ @@ -2017,7 +2500,8 @@ reserved_varying_slot(struct gl_linked_shader *stage, var_slot = var->data.location - VARYING_SLOT_VAR0; unsigned num_elements = get_varying_type(var, stage->Stage) - ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX); + ->count_attribute_slots(io_mode == ir_var_shader_in && + stage->Stage == MESA_SHADER_VERTEX); for (unsigned i = 0; i < num_elements; i++) { if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH) slots |= UINT64_C(1) << var_slot; @@ -2091,8 +2575,9 @@ assign_varying_locations(struct gl_context *ctx, disable_varying_packing = true; varying_matches matches(disable_varying_packing, xfb_enabled, - producer ? producer->Stage : (gl_shader_stage)-1, - consumer ? consumer->Stage : (gl_shader_stage)-1); + ctx->Extensions.ARB_enhanced_layouts, + producer ? producer->Stage : MESA_SHADER_NONE, + consumer ? consumer->Stage : MESA_SHADER_NONE); hash_table *tfeedback_candidates = _mesa_hash_table_create(NULL, _mesa_key_hash_string, _mesa_key_string_equal); @@ -2150,8 +2635,28 @@ assign_varying_locations(struct gl_context *ctx, producer->Stage == MESA_SHADER_GEOMETRY)); if (num_tfeedback_decls > 0) { - tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates); - g.process(output_var); + tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage); + /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1 + * ("Vertex Shader Variables / Output Variables") + * + * "Each program object can specify a set of output variables from + * one shader to be recorded in transform feedback mode (see + * section 13.3). The variables that can be recorded are those + * emitted by the first active shader, in order, from the + * following list: + * + * * geometry shader + * * tessellation evaluation shader + * * tessellation control shader + * * vertex shader" + * + * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader + * Variables / Output Variables") tessellation control shader is + * not included in the stages list. + */ + if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) { + g.process(output_var); + } } ir_variable *const input_var = @@ -2188,17 +2693,12 @@ assign_varying_locations(struct gl_context *ctx, */ foreach_in_list(ir_instruction, node, consumer->ir) { ir_variable *const input_var = node->as_variable(); - - if (input_var == NULL || input_var->data.mode != ir_var_shader_in) - continue; - - matches.record(NULL, input_var); + if (input_var && input_var->data.mode == ir_var_shader_in) { + matches.record(NULL, input_var); + } } } - _mesa_hash_table_destroy(consumer_inputs, NULL); - _mesa_hash_table_destroy(consumer_interface_inputs, NULL); - for (unsigned i = 0; i < num_tfeedback_decls; ++i) { if (!tfeedback_decls[i].is_varying()) continue; @@ -2211,24 +2711,45 @@ assign_varying_locations(struct gl_context *ctx, return false; } + /* Mark xfb varyings as always active */ + matched_candidate->toplevel_var->data.always_active_io = 1; + + /* Mark any corresponding inputs as always active also. We must do this + * because we have a NIR pass that lowers vectors to scalars and another + * that removes unused varyings. + * We don't split varyings marked as always active because there is no + * point in doing so. This means we need to mark both sides of the + * interface as always active otherwise we will have a mismatch and + * start removing things we shouldn't. + */ + ir_variable *const input_var = + linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var, + consumer_inputs, + consumer_interface_inputs, + consumer_inputs_with_locations); + if (input_var) + input_var->data.always_active_io = 1; + if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) { matched_candidate->toplevel_var->data.is_xfb_only = 1; matches.record(matched_candidate->toplevel_var, NULL); } } + _mesa_hash_table_destroy(consumer_inputs, NULL); + _mesa_hash_table_destroy(consumer_interface_inputs, NULL); + uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0}; const unsigned slots_used = matches.assign_locations( prog, components, reserved_slots); matches.store_locations(); for (unsigned i = 0; i < num_tfeedback_decls; ++i) { - if (!tfeedback_decls[i].is_varying()) - continue; - - if (!tfeedback_decls[i].assign_location(ctx, prog)) { - _mesa_hash_table_destroy(tfeedback_candidates, NULL); - return false; + if (tfeedback_decls[i].is_varying()) { + if (!tfeedback_decls[i].assign_location(ctx, prog)) { + _mesa_hash_table_destroy(tfeedback_candidates, NULL); + return false; + } } } _mesa_hash_table_destroy(tfeedback_candidates, NULL); @@ -2293,7 +2814,7 @@ assign_varying_locations(struct gl_context *ctx, return true; } -bool +static bool check_against_output_limit(struct gl_context *ctx, struct gl_shader_program *prog, gl_linked_shader *producer, @@ -2337,7 +2858,7 @@ check_against_output_limit(struct gl_context *ctx, return true; } -bool +static bool check_against_input_limit(struct gl_context *ctx, struct gl_shader_program *prog, gl_linked_shader *consumer, @@ -2516,13 +3037,13 @@ link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last, /* This must be done after all dead varyings are eliminated. */ if (sh_i != NULL) { - unsigned slots_used = _mesa_bitcount_64(reserved_out_slots); + unsigned slots_used = util_bitcount64(reserved_out_slots); if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) { return false; } } - unsigned slots_used = _mesa_bitcount_64(reserved_in_slots); + unsigned slots_used = util_bitcount64(reserved_in_slots); if (!check_against_input_limit(ctx, prog, sh_next, slots_used)) return false; @@ -2532,7 +3053,7 @@ link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last, } if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls, - has_xfb_qualifiers)) + has_xfb_qualifiers, mem_ctx)) return false; return true;