X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fglsl%2Flink_varyings.cpp;h=1fdfcb877deb9e608cb634a29c3e9543218cc108;hb=4925347ec5d469574f1280e972940cfea345ed6e;hp=99fb3fcdc004f1f04e0ac4e256db9f7681fea878;hpb=35616a9e0ef0511ebb77e7076c00f2eeb248933a;p=mesa.git diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 99fb3fcdc00..1fdfcb877de 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -36,7 +36,7 @@ #include "linker.h" #include "link_varyings.h" #include "main/macros.h" -#include "program/hash_table.h" +#include "util/hash_table.h" #include "program.h" @@ -106,8 +106,9 @@ create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name, } } -bool -process_xfb_layout_qualifiers(void *mem_ctx, const gl_shader *sh, +static bool +process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh, + struct gl_shader_program *prog, unsigned *num_tfeedback_decls, char ***varying_names) { @@ -118,8 +119,9 @@ process_xfb_layout_qualifiers(void *mem_ctx, const gl_shader *sh, * xfb_stride to interface block members so this will catch that case also. */ for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { - if (sh->TransformFeedback.BufferStride[j]) { + if (prog->TransformFeedback.BufferStride[j]) { has_xfb_qualifiers = true; + break; } } @@ -163,10 +165,12 @@ process_xfb_layout_qualifiers(void *mem_ctx, const gl_shader *sh, if (var->data.from_named_ifc_block) { type = var->get_interface_type(); + /* Find the member type before it was altered by lowering */ + const glsl_type *type_wa = type->without_array(); member_type = - type->fields.structure[type->field_index(var->name)].type; - name = ralloc_strdup(NULL, type->without_array()->name); + type_wa->fields.structure[type_wa->field_index(var->name)].type; + name = ralloc_strdup(NULL, type_wa->name); } else { type = var->type; member_type = NULL; @@ -182,31 +186,13 @@ process_xfb_layout_qualifiers(void *mem_ctx, const gl_shader *sh, return has_xfb_qualifiers; } -static bool -anonymous_struct_type_matches(const glsl_type *output_type, - const glsl_type *to_match) -{ - while (output_type->is_array() && to_match->is_array()) { - /* if the lengths at each level don't match fail. */ - if (output_type->length != to_match->length) - return false; - output_type = output_type->fields.array; - to_match = to_match->fields.array; - } - - if (output_type->is_array() || to_match->is_array()) - return false; - return output_type->is_anonymous() && - to_match->is_anonymous() && - to_match->record_compare(output_type); -} - /** * Validate the types and qualifiers of an output from one stage against the * matching input to another stage. */ static void -cross_validate_types_and_qualifiers(struct gl_shader_program *prog, +cross_validate_types_and_qualifiers(struct gl_context *ctx, + struct gl_shader_program *prog, const ir_variable *input, const ir_variable *output, gl_shader_stage consumer_stage, @@ -245,19 +231,15 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, * fragment language." */ if (!output->type->is_array() || !is_gl_identifier(output->name)) { - bool anon_matches = anonymous_struct_type_matches(output->type, type_to_match); - - if (!anon_matches) { - linker_error(prog, - "%s shader output `%s' declared as type `%s', " - "but %s shader input declared as type `%s'\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - output->type->name, - _mesa_shader_stage_to_string(consumer_stage), - input->type->name); - return; - } + linker_error(prog, + "%s shader output `%s' declared as type `%s', " + "but %s shader input declared as type `%s'\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + output->type->name, + _mesa_shader_stage_to_string(consumer_stage), + input->type->name); + return; } } @@ -271,7 +253,7 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, * OpenGLES 3.0 drivers, so we relax the checking in all cases. */ if (false /* always skip the centroid check */ && - prog->Version < (prog->IsES ? 310 : 430) && + prog->data->Version < (prog->IsES ? 310 : 430) && input->data.centroid != output->data.centroid) { linker_error(prog, "%s shader output `%s' %s centroid qualifier, " @@ -308,7 +290,25 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, return; } - if (!prog->IsES && input->data.invariant != output->data.invariant) { + /* The GLSL 4.30 and GLSL ES 3.00 specifications say: + * + * "As only outputs need be declared with invariant, an output from + * one shader stage will still match an input of a subsequent stage + * without the input being declared as invariant." + * + * while GLSL 4.20 says: + * + * "For variables leaving one shader and coming into another shader, + * the invariant keyword has to be used in both shaders, or a link + * error will result." + * + * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says: + * + * "The invariance of varyings that are declared in both the vertex + * and fragment shaders must match." + */ + if (input->data.invariant != output->data.invariant && + prog->data->Version < (prog->IsES ? 300 : 430)) { linker_error(prog, "%s shader output `%s' %s invariant qualifier, " "but %s shader input %s invariant qualifier\n", @@ -328,20 +328,48 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, * "It is a link-time error if, within the same stage, the interpolation * qualifiers of variables of the same name do not match. * + * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says: + * + * "When no interpolation qualifier is present, smooth interpolation + * is used." + * + * So we match variables where one is smooth and the other has no explicit + * qualifier. */ - if (input->data.interpolation != output->data.interpolation && - prog->Version < 440) { - linker_error(prog, - "%s shader output `%s' specifies %s " - "interpolation qualifier, " - "but %s shader input specifies %s " - "interpolation qualifier\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - interpolation_string(output->data.interpolation), - _mesa_shader_stage_to_string(consumer_stage), - interpolation_string(input->data.interpolation)); - return; + unsigned input_interpolation = input->data.interpolation; + unsigned output_interpolation = output->data.interpolation; + if (prog->IsES) { + if (input_interpolation == INTERP_MODE_NONE) + input_interpolation = INTERP_MODE_SMOOTH; + if (output_interpolation == INTERP_MODE_NONE) + output_interpolation = INTERP_MODE_SMOOTH; + } + if (input_interpolation != output_interpolation && + prog->data->Version < 440) { + if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) { + linker_error(prog, + "%s shader output `%s' specifies %s " + "interpolation qualifier, " + "but %s shader input specifies %s " + "interpolation qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + interpolation_string(output->data.interpolation), + _mesa_shader_stage_to_string(consumer_stage), + interpolation_string(input->data.interpolation)); + return; + } else { + linker_warning(prog, + "%s shader output `%s' specifies %s " + "interpolation qualifier, " + "but %s shader input specifies %s " + "interpolation qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + interpolation_string(output->data.interpolation), + _mesa_shader_stage_to_string(consumer_stage), + interpolation_string(input->data.interpolation)); + } } } @@ -349,7 +377,8 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, * Validate front and back color outputs against single color input */ static void -cross_validate_front_and_back_color(struct gl_shader_program *prog, +cross_validate_front_and_back_color(struct gl_context *ctx, + struct gl_shader_program *prog, const ir_variable *input, const ir_variable *front_color, const ir_variable *back_color, @@ -357,24 +386,301 @@ cross_validate_front_and_back_color(struct gl_shader_program *prog, gl_shader_stage producer_stage) { if (front_color != NULL && front_color->data.assigned) - cross_validate_types_and_qualifiers(prog, input, front_color, + cross_validate_types_and_qualifiers(ctx, prog, input, front_color, consumer_stage, producer_stage); if (back_color != NULL && back_color->data.assigned) - cross_validate_types_and_qualifiers(prog, input, back_color, + cross_validate_types_and_qualifiers(ctx, prog, input, back_color, consumer_stage, producer_stage); } +static unsigned +compute_variable_location_slot(ir_variable *var, gl_shader_stage stage) +{ + unsigned location_start = VARYING_SLOT_VAR0; + + switch (stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == ir_var_shader_in) + location_start = VERT_ATTRIB_GENERIC0; + break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + if (var->data.patch) + location_start = VARYING_SLOT_PATCH0; + break; + case MESA_SHADER_FRAGMENT: + if (var->data.mode == ir_var_shader_out) + location_start = FRAG_RESULT_DATA0; + break; + default: + break; + } + + return var->data.location - location_start; +} + +struct explicit_location_info { + ir_variable *var; + unsigned numerical_type; + unsigned interpolation; + bool centroid; + bool sample; + bool patch; +}; + +static inline unsigned +get_numerical_type(const glsl_type *type) +{ + /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 68, + * (Location aliasing): + * + * "Further, when location aliasing, the aliases sharing the location + * must have the same underlying numerical type (floating-point or + * integer) + */ + if (type->is_float() || type->is_double()) + return GLSL_TYPE_FLOAT; + return GLSL_TYPE_INT; +} + +static bool +check_location_aliasing(struct explicit_location_info explicit_locations[][4], + ir_variable *var, + unsigned location, + unsigned component, + unsigned location_limit, + const glsl_type *type, + unsigned interpolation, + bool centroid, + bool sample, + bool patch, + gl_shader_program *prog, + gl_shader_stage stage) +{ + unsigned last_comp; + if (type->without_array()->is_record()) { + /* The component qualifier can't be used on structs so just treat + * all component slots as used. + */ + last_comp = 4; + } else { + unsigned dmul = type->without_array()->is_64bit() ? 2 : 1; + last_comp = component + type->without_array()->vector_elements * dmul; + } + + while (location < location_limit) { + unsigned comp = 0; + while (comp < 4) { + struct explicit_location_info *info = + &explicit_locations[location][comp]; + + if (info->var) { + /* Component aliasing is not alloed */ + if (comp >= component && comp < last_comp) { + linker_error(prog, + "%s shader has multiple outputs explicitly " + "assigned to location %d and component %d\n", + _mesa_shader_stage_to_string(stage), + location, comp); + return false; + } else { + /* For all other used components we need to have matching + * types, interpolation and auxiliary storage + */ + if (info->numerical_type != + get_numerical_type(type->without_array())) { + linker_error(prog, + "Varyings sharing the same location must " + "have the same underlying numerical type. " + "Location %u component %u\n", + location, comp); + return false; + } + + if (info->interpolation != interpolation) { + linker_error(prog, + "%s shader has multiple outputs at explicit " + "location %u with different interpolation " + "settings\n", + _mesa_shader_stage_to_string(stage), location); + return false; + } + + if (info->centroid != centroid || + info->sample != sample || + info->patch != patch) { + linker_error(prog, + "%s shader has multiple outputs at explicit " + "location %u with different aux storage\n", + _mesa_shader_stage_to_string(stage), location); + return false; + } + } + } else if (comp >= component && comp < last_comp) { + info->var = var; + info->numerical_type = get_numerical_type(type->without_array()); + info->interpolation = interpolation; + info->centroid = centroid; + info->sample = sample; + info->patch = patch; + } + + comp++; + + /* We need to do some special handling for doubles as dvec3 and + * dvec4 consume two consecutive locations. We don't need to + * worry about components beginning at anything other than 0 as + * the spec does not allow this for dvec3 and dvec4. + */ + if (comp == 4 && last_comp > 4) { + last_comp = last_comp - 4; + /* Bump location index and reset the component index */ + location++; + comp = 0; + component = 0; + } + } + + location++; + } + + return true; +} + +static bool +validate_explicit_variable_location(struct gl_context *ctx, + struct explicit_location_info explicit_locations[][4], + ir_variable *var, + gl_shader_program *prog, + gl_linked_shader *sh) +{ + const glsl_type *type = get_varying_type(var, sh->Stage); + unsigned num_elements = type->count_attribute_slots(false); + unsigned idx = compute_variable_location_slot(var, sh->Stage); + unsigned slot_limit = idx + num_elements; + + /* Vertex shader inputs and fragment shader outputs are validated in + * assign_attribute_or_color_locations() so we should not attempt to + * validate them again here. + */ + unsigned slot_max; + if (var->data.mode == ir_var_shader_out) { + assert(sh->Stage != MESA_SHADER_FRAGMENT); + slot_max = + ctx->Const.Program[sh->Stage].MaxOutputComponents / 4; + } else { + assert(var->data.mode == ir_var_shader_in); + assert(sh->Stage != MESA_SHADER_VERTEX); + slot_max = + ctx->Const.Program[sh->Stage].MaxInputComponents / 4; + } + + if (slot_limit > slot_max) { + linker_error(prog, + "Invalid location %u in %s shader\n", + idx, _mesa_shader_stage_to_string(sh->Stage)); + return false; + } + + const glsl_type *type_without_array = type->without_array(); + if (type_without_array->is_interface()) { + for (unsigned i = 0; i < type_without_array->length; i++) { + glsl_struct_field *field = &type_without_array->fields.structure[i]; + unsigned field_location = field->location - + (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0); + if (!check_location_aliasing(explicit_locations, var, + field_location, + 0, field_location + 1, + field->type, + field->interpolation, + field->centroid, + field->sample, + field->patch, + prog, sh->Stage)) { + return false; + } + } + } else if (!check_location_aliasing(explicit_locations, var, + idx, var->data.location_frac, + slot_limit, type, + var->data.interpolation, + var->data.centroid, + var->data.sample, + var->data.patch, + prog, sh->Stage)) { + return false; + } + + return true; +} + +/** + * Validate explicit locations for the inputs to the first stage and the + * outputs of the last stage in an SSO program (everything in between is + * validated in cross_validate_outputs_to_inputs). + */ +void +validate_sso_explicit_locations(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader_stage first_stage, + gl_shader_stage last_stage) +{ + assert(prog->SeparateShader); + + /* VS inputs and FS outputs are validated in + * assign_attribute_or_color_locations() + */ + bool validate_first_stage = first_stage != MESA_SHADER_VERTEX; + bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT; + if (!validate_first_stage && !validate_last_stage) + return; + + struct explicit_location_info explicit_locations[MAX_VARYING][4]; + + gl_shader_stage stages[2] = { first_stage, last_stage }; + bool validate_stage[2] = { validate_first_stage, validate_last_stage }; + ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out }; + + for (unsigned i = 0; i < 2; i++) { + if (!validate_stage[i]) + continue; + + gl_shader_stage stage = stages[i]; + + gl_linked_shader *sh = prog->_LinkedShaders[stage]; + assert(sh); + + memset(explicit_locations, 0, sizeof(explicit_locations)); + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || + !var->data.explicit_location || + var->data.location < VARYING_SLOT_VAR0 || + var->data.mode != var_direction[i]) + continue; + + if (!validate_explicit_variable_location( + ctx, explicit_locations, var, prog, sh)) { + return; + } + } + } +} + /** * Validate that outputs from one stage match inputs of another */ void -cross_validate_outputs_to_inputs(struct gl_shader_program *prog, - gl_shader *producer, gl_shader *consumer) +cross_validate_outputs_to_inputs(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_linked_shader *producer, + gl_linked_shader *consumer) { glsl_symbol_table parameters; - ir_variable *explicit_locations[MAX_VARYINGS_INCL_PATCH][4] = - { {NULL, NULL} }; + struct explicit_location_info explicit_locations[MAX_VARYING][4] = { 0 }; /* Find all shader outputs in the "producer" stage. */ @@ -391,66 +697,10 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, /* User-defined varyings with explicit locations are handled * differently because they do not need to have matching names. */ - const glsl_type *type = get_varying_type(var, producer->Stage); - unsigned num_elements = type->count_attribute_slots(false); - unsigned idx = var->data.location - VARYING_SLOT_VAR0; - unsigned slot_limit = idx + num_elements; - unsigned last_comp; - - if (var->type->without_array()->is_record()) { - /* The component qualifier can't be used on structs so just treat - * all component slots as used. - */ - last_comp = 4; - } else { - unsigned dmul = var->type->is_64bit() ? 2 : 1; - last_comp = var->data.location_frac + - var->type->without_array()->vector_elements * dmul; - } - - while (idx < slot_limit) { - unsigned i = var->data.location_frac; - while (i < last_comp) { - if (explicit_locations[idx][i] != NULL) { - linker_error(prog, - "%s shader has multiple outputs explicitly " - "assigned to location %d and component %d\n", - _mesa_shader_stage_to_string(producer->Stage), - idx, var->data.location_frac); - return; - } - - /* Make sure all component at this location have the same type. - */ - for (unsigned j = 0; j < 4; j++) { - if (explicit_locations[idx][j] && - (explicit_locations[idx][j]->type->without_array() - ->base_type != var->type->without_array()->base_type)) { - linker_error(prog, - "Varyings sharing the same location must " - "have the same underlying numerical type. " - "Location %u component %u\n", idx, - var->data.location_frac); - return; - } - } - - explicit_locations[idx][i] = var; - i++; - - /* We need to do some special handling for doubles as dvec3 and - * dvec4 consume two consecutive locations. We don't need to - * worry about components beginning at anything other than 0 as - * the spec does not allow this for dvec3 and dvec4. - */ - if (i == 3 && last_comp > 4) { - last_comp = last_comp - 4; - /* Bump location index and reset the component index */ - idx++; - i = 0; - } - } - idx++; + if (!validate_explicit_variable_location(ctx, + explicit_locations, + var, prog, producer)) { + return; } } } @@ -477,7 +727,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, const ir_variable *const back_color = parameters.get_variable("gl_BackColor"); - cross_validate_front_and_back_color(prog, input, + cross_validate_front_and_back_color(ctx, prog, input, front_color, back_color, consumer->Stage, producer->Stage); } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { @@ -487,7 +737,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, const ir_variable *const back_color = parameters.get_variable("gl_BackSecondaryColor"); - cross_validate_front_and_back_color(prog, input, + cross_validate_front_and_back_color(ctx, prog, input, front_color, back_color, consumer->Stage, producer->Stage); } else { @@ -502,11 +752,19 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, const glsl_type *type = get_varying_type(input, consumer->Stage); unsigned num_elements = type->count_attribute_slots(false); - unsigned idx = input->data.location - VARYING_SLOT_VAR0; + unsigned idx = + compute_variable_location_slot(input, consumer->Stage); unsigned slot_limit = idx + num_elements; while (idx < slot_limit) { - output = explicit_locations[idx][input->data.location_frac]; + if (idx >= MAX_VARYING) { + linker_error(prog, + "Invalid location %u in %s shader\n", idx, + _mesa_shader_stage_to_string(consumer->Stage)); + return; + } + + output = explicit_locations[idx][input->data.location_frac].var; if (output == NULL || input->data.location != output->data.location) { @@ -529,7 +787,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, */ if (!(input->get_interface_type() && output->get_interface_type())) - cross_validate_types_and_qualifiers(prog, input, output, + cross_validate_types_and_qualifiers(ctx, prog, input, output, consumer->Stage, producer->Stage); } else { @@ -554,9 +812,9 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, * Demote shader inputs and outputs that are not used in other stages, and * remove them via dead code elimination. */ -void +static void remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, - gl_shader *sh, + gl_linked_shader *sh, enum ir_variable_mode mode) { if (is_separate_shader_object) @@ -574,6 +832,11 @@ remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, */ if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) { assert(var->data.mode != ir_var_temporary); + + /* Assign zeros to demoted inputs to allow more optimizations. */ + if (var->data.mode == ir_var_shader_in && !var->constant_value) + var->constant_value = ir_constant::zero(var, var->type); + var->data.mode = ir_var_auto; } } @@ -719,10 +982,12 @@ tfeedback_decl::assign_location(struct gl_context *ctx, unsigned actual_array_size; switch (this->lowered_builtin_array_variable) { case clip_distance: - actual_array_size = prog->LastClipDistanceArraySize; + actual_array_size = prog->last_vert_prog ? + prog->last_vert_prog->info.clip_distance_array_size : 0; break; case cull_distance: - actual_array_size = prog->LastCullDistanceArraySize; + actual_array_size = prog->last_vert_prog ? + prog->last_vert_prog->info.cull_distance_array_size : 0; break; case tess_level_outer: actual_array_size = 4; @@ -886,7 +1151,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, } if (explicit_stride && explicit_stride[buffer]) { - if (this->is_double() && info->Buffers[buffer].Stride % 2) { + if (this->is_64bit() && info->Buffers[buffer].Stride % 2) { linker_error(prog, "invalid qualifier xfb_stride=%d must be a " "multiple of 8 as its applied to a type that is or " "contains a double.", @@ -962,8 +1227,11 @@ tfeedback_decl::find_candidate(gl_shader_program *prog, name = "gl_TessLevelInnerMESA"; break; } - this->matched_candidate = (const tfeedback_candidate *) - hash_table_find(tfeedback_candidates, name); + hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name); + + this->matched_candidate = entry ? + (const tfeedback_candidate *) entry->data : NULL; + if (!this->matched_candidate) { /* From GL_EXT_transform_feedback: * A program will fail to link if: @@ -975,6 +1243,7 @@ tfeedback_decl::find_candidate(gl_shader_program *prog, linker_error(prog, "Transform feedback varying %s undeclared.", this->orig_name); } + return this->matched_candidate; } @@ -986,7 +1255,7 @@ tfeedback_decl::find_candidate(gl_shader_program *prog, * If an error occurs, the error is reported through linker_error() and false * is returned. */ -bool +static bool parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, const void *mem_ctx, unsigned num_names, char **varying_names, tfeedback_decl *decls) @@ -1008,13 +1277,12 @@ parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, * feedback of arrays would be useless otherwise. */ for (unsigned j = 0; j < i; ++j) { - if (!decls[j].is_varying()) - continue; - - if (tfeedback_decl::is_same(decls[i], decls[j])) { - linker_error(prog, "Transform feedback varying %s specified " - "more than once.", varying_names[i]); - return false; + if (decls[j].is_varying()) { + if (tfeedback_decl::is_same(decls[i], decls[j])) { + linker_error(prog, "Transform feedback varying %s specified " + "more than once.", varying_names[i]); + return false; + } } } } @@ -1035,16 +1303,20 @@ cmp_xfb_offset(const void * x_generic, const void * y_generic) /** * Store transform feedback location assignments into - * prog->LinkedTransformFeedback based on the data stored in tfeedback_decls. + * prog->sh.LinkedTransformFeedback based on the data stored in + * tfeedback_decls. * * If an error occurs, the error is reported through linker_error() and false * is returned. */ -bool +static bool store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, unsigned num_tfeedback_decls, tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers) { + if (!prog->last_vert_prog) + return true; + /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for * tracking the number of buffers doesn't overflow. */ @@ -1053,23 +1325,21 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, bool separate_attribs_mode = prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; - ralloc_free(prog->LinkedTransformFeedback.Varyings); - ralloc_free(prog->LinkedTransformFeedback.Outputs); - - memset(&prog->LinkedTransformFeedback, 0, - sizeof(prog->LinkedTransformFeedback)); + struct gl_program *xfb_prog = prog->last_vert_prog; + xfb_prog->sh.LinkedTransformFeedback = + rzalloc(xfb_prog, struct gl_transform_feedback_info); /* The xfb_offset qualifier does not have to be used in increasing order * however some drivers expect to receive the list of transform feedback * declarations in order so sort it now for convenience. */ - if (has_xfb_qualifiers) + if (has_xfb_qualifiers) { qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls), cmp_xfb_offset); + } - prog->LinkedTransformFeedback.Varyings = - rzalloc_array(prog, - struct gl_transform_feedback_varying_info, + xfb_prog->sh.LinkedTransformFeedback->Varyings = + rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info, num_tfeedback_decls); unsigned num_outputs = 0; @@ -1078,9 +1348,8 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, num_outputs += tfeedback_decls[i].get_num_outputs(); } - prog->LinkedTransformFeedback.Outputs = - rzalloc_array(prog, - struct gl_transform_feedback_output, + xfb_prog->sh.LinkedTransformFeedback->Outputs = + rzalloc_array(xfb_prog, struct gl_transform_feedback_output, num_outputs); unsigned num_buffers = 0; @@ -1089,7 +1358,8 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, if (!has_xfb_qualifiers && separate_attribs_mode) { /* GL_SEPARATE_ATTRIBS */ for (unsigned i = 0; i < num_tfeedback_decls; ++i) { - if (!tfeedback_decls[i].store(ctx, prog, &prog->LinkedTransformFeedback, + if (!tfeedback_decls[i].store(ctx, prog, + xfb_prog->sh.LinkedTransformFeedback, num_buffers, num_buffers, num_outputs, NULL, has_xfb_qualifiers)) return false; @@ -1109,9 +1379,8 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, if (has_xfb_qualifiers) { for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { if (prog->TransformFeedback.BufferStride[j]) { - buffers |= 1 << j; explicit_stride[j] = true; - prog->LinkedTransformFeedback.Buffers[j].Stride = + xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride = prog->TransformFeedback.BufferStride[j] / 4; } } @@ -1127,17 +1396,31 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, if (tfeedback_decls[i].is_next_buffer_separator()) { if (!tfeedback_decls[i].store(ctx, prog, - &prog->LinkedTransformFeedback, + xfb_prog->sh.LinkedTransformFeedback, buffer, num_buffers, num_outputs, explicit_stride, has_xfb_qualifiers)) return false; num_buffers++; buffer_stream_id = -1; continue; - } else if (tfeedback_decls[i].is_varying()) { + } + + if (has_xfb_qualifiers) { + buffer = tfeedback_decls[i].get_buffer(); + } else { + buffer = num_buffers; + } + + if (tfeedback_decls[i].is_varying()) { if (buffer_stream_id == -1) { /* First varying writing to this buffer: remember its stream */ buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); + + /* Only mark a buffer as active when there is a varying + * attached to it. This behaviour is based on a revised version + * of section 13.2.2 of the GL 4.6 spec. + */ + buffers |= 1 << buffer; } else if (buffer_stream_id != (int) tfeedback_decls[i].get_stream_id()) { /* Varying writes to the same buffer from a different stream */ @@ -1153,24 +1436,17 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, } } - if (has_xfb_qualifiers) { - buffer = tfeedback_decls[i].get_buffer(); - } else { - buffer = num_buffers; - } - buffers |= 1 << buffer; - if (!tfeedback_decls[i].store(ctx, prog, - &prog->LinkedTransformFeedback, + xfb_prog->sh.LinkedTransformFeedback, buffer, num_buffers, num_outputs, explicit_stride, has_xfb_qualifiers)) return false; } } - assert(prog->LinkedTransformFeedback.NumOutputs == num_outputs); + assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs); - prog->LinkedTransformFeedback.ActiveBuffers = buffers; + xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers; return true; } @@ -1184,17 +1460,19 @@ class varying_matches { public: varying_matches(bool disable_varying_packing, bool xfb_enabled, + bool enhanced_layouts_enabled, gl_shader_stage producer_stage, gl_shader_stage consumer_stage); ~varying_matches(); void record(ir_variable *producer_var, ir_variable *consumer_var); unsigned assign_locations(struct gl_shader_program *prog, + uint8_t components[], uint64_t reserved_slots); void store_locations() const; private: bool is_varying_packing_safe(const glsl_type *type, - const ir_variable *var); + const ir_variable *var) const; /** * If true, this driver disables varying packing, so all varyings need to @@ -1216,6 +1494,8 @@ private: */ const bool xfb_enabled; + const bool enhanced_layouts_enabled; + /** * Enum representing the order in which varyings are packed within a * packing class. @@ -1292,10 +1572,12 @@ private: varying_matches::varying_matches(bool disable_varying_packing, bool xfb_enabled, + bool enhanced_layouts_enabled, gl_shader_stage producer_stage, gl_shader_stage consumer_stage) : disable_varying_packing(disable_varying_packing), xfb_enabled(xfb_enabled), + enhanced_layouts_enabled(enhanced_layouts_enabled), producer_stage(producer_stage), consumer_stage(consumer_stage) { @@ -1323,7 +1605,7 @@ varying_matches::~varying_matches() */ bool varying_matches::is_varying_packing_safe(const glsl_type *type, - const ir_variable *var) + const ir_variable *var) const { if (consumer_stage == MESA_SHADER_TESS_EVAL || consumer_stage == MESA_SHADER_TESS_CTRL || @@ -1371,8 +1653,9 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) (producer_var->type->contains_integer() || producer_var->type->contains_double()); - if (needs_flat_qualifier || - (consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) { + if (!disable_varying_packing && + (needs_flat_qualifier || + (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) { /* Since this varying is not being consumed by the fragment shader, its * interpolation type varying cannot possibly affect rendering. * Also, this variable is non-flat and is (or contains) an integer @@ -1387,13 +1670,13 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) if (producer_var) { producer_var->data.centroid = false; producer_var->data.sample = false; - producer_var->data.interpolation = INTERP_QUALIFIER_FLAT; + producer_var->data.interpolation = INTERP_MODE_FLAT; } if (consumer_var) { consumer_var->data.centroid = false; consumer_var->data.sample = false; - consumer_var->data.interpolation = INTERP_QUALIFIER_FLAT; + consumer_var->data.interpolation = INTERP_MODE_FLAT; } } @@ -1404,23 +1687,46 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) sizeof(*this->matches) * this->matches_capacity); } - const ir_variable *const var = (producer_var != NULL) - ? producer_var : consumer_var; - const gl_shader_stage stage = (producer_var != NULL) - ? producer_stage : consumer_stage; + /* We must use the consumer to compute the packing class because in GL4.4+ + * there is no guarantee interpolation qualifiers will match across stages. + * + * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec: + * + * "The type and presence of interpolation qualifiers of variables with + * the same name declared in all linked shaders for the same cross-stage + * interface must match, otherwise the link command will fail. + * + * When comparing an output from one stage to an input of a subsequent + * stage, the input and output don't match if their interpolation + * qualifiers (or lack thereof) are not the same." + * + * This text was also in at least revison 7 of the 4.40 spec but is no + * longer in revision 9 and not in the 4.50 spec. + */ + const ir_variable *const var = (consumer_var != NULL) + ? consumer_var : producer_var; + const gl_shader_stage stage = (consumer_var != NULL) + ? consumer_stage : producer_stage; const glsl_type *type = get_varying_type(var, stage); + if (producer_var && consumer_var && + consumer_var->data.must_be_shader_input) { + producer_var->data.must_be_shader_input = 1; + } + this->matches[this->num_matches].packing_class = this->compute_packing_class(var); this->matches[this->num_matches].packing_order = this->compute_packing_order(var); - if (this->disable_varying_packing && !is_varying_packing_safe(type, var)) { + if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) || + var->data.must_be_shader_input) { unsigned slots = type->count_attribute_slots(false); this->matches[this->num_matches].num_components = slots * 4; } else { this->matches[this->num_matches].num_components = type->component_slots(); } + this->matches[this->num_matches].producer_var = producer_var; this->matches[this->num_matches].consumer_var = consumer_var; this->num_matches++; @@ -1434,9 +1740,15 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) /** * Choose locations for all of the variable matches that were previously * passed to varying_matches::record(). + * \param components returns array[slot] of number of components used + * per slot (1, 2, 3 or 4) + * \param reserved_slots bitmask indicating which varying slots are already + * allocated + * \return number of slots (4-element vectors) allocated */ unsigned varying_matches::assign_locations(struct gl_shader_program *prog, + uint8_t components[], uint64_t reserved_slots) { /* If packing has been disabled then we cannot safely sort the varyings by @@ -1461,13 +1773,24 @@ varying_matches::assign_locations(struct gl_shader_program *prog, unsigned generic_location = 0; unsigned generic_patch_location = MAX_VARYING*4; bool previous_var_xfb_only = false; + unsigned previous_packing_class = ~0u; + + /* For tranform feedback separate mode, we know the number of attributes + * is <= the number of buffers. So packing isn't critical. In fact, + * packing vec3 attributes can cause trouble because splitting a vec3 + * effectively creates an additional transform feedback output. The + * extra TFB output may exceed device driver limits. + */ + const bool dont_pack_vec3 = + (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && + prog->TransformFeedback.NumVarying > 0); for (unsigned i = 0; i < this->num_matches; i++) { unsigned *location = &generic_location; - const ir_variable *var; const glsl_type *type; bool is_vertex_input = false; + if (matches[i].consumer_var) { var = matches[i].consumer_var; type = get_varying_type(var, consumer_stage); @@ -1492,23 +1815,28 @@ varying_matches::assign_locations(struct gl_shader_program *prog, * we can pack varyings together that are only used for transform * feedback. */ - if ((this->disable_varying_packing && + if (var->data.must_be_shader_input || + (this->disable_varying_packing && !(previous_var_xfb_only && var->data.is_xfb_only)) || - (i > 0 && this->matches[i - 1].packing_class - != this->matches[i].packing_class )) { + (previous_packing_class != this->matches[i].packing_class) || + (this->matches[i].packing_order == PACKING_ORDER_VEC3 && + dont_pack_vec3)) { *location = ALIGN(*location, 4); } previous_var_xfb_only = var->data.is_xfb_only; + previous_packing_class = this->matches[i].packing_class; - unsigned num_elements = type->count_attribute_slots(is_vertex_input); - unsigned slot_end; - if (this->disable_varying_packing && - !is_varying_packing_safe(type, var)) - slot_end = 4; - else - slot_end = type->without_array()->vector_elements; - slot_end += *location - 1; + /* The number of components taken up by this variable. For vertex shader + * inputs, we use the number of slots * 4, as they have different + * counting rules. + */ + unsigned num_components = is_vertex_input ? + type->count_attribute_slots(is_vertex_input) * 4 : + this->matches[i].num_components; + + /* The last slot for this variable, inclusive. */ + unsigned slot_end = *location + num_components - 1; /* FIXME: We could be smarter in the below code and loop back over * trying to fill any locations that we skipped because we couldn't pack @@ -1516,29 +1844,21 @@ varying_matches::assign_locations(struct gl_shader_program *prog, * hit the linking error if we run out of room and suggest they use * explicit locations. */ - for (unsigned j = 0; j < num_elements; j++) { - while ((slot_end < MAX_VARYING * 4u) && - ((reserved_slots & (UINT64_C(1) << *location / 4u) || - (reserved_slots & (UINT64_C(1) << slot_end / 4u))))) { + while (slot_end < MAX_VARYING * 4u) { + const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1; + const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u); - *location = ALIGN(*location + 1, 4); - slot_end = *location; + assert(slots > 0); - /* reset the counter and try again */ - j = 0; + if ((reserved_slots & slot_mask) == 0) { + break; } - /* Increase the slot to make sure there is enough room for next - * array element. - */ - if (this->disable_varying_packing && - !is_varying_packing_safe(type, var)) - slot_end += 4; - else - slot_end += type->without_array()->vector_elements; + *location = ALIGN(*location + 1, 4); + slot_end = *location + num_components - 1; } - if (!var->data.patch && *location >= MAX_VARYING * 4u) { + if (!var->data.patch && slot_end >= MAX_VARYING * 4u) { linker_error(prog, "insufficient contiguous locations available for " "%s it is possible an array or struct could not be " "packed between varyings with explicit locations. Try " @@ -1546,9 +1866,15 @@ varying_matches::assign_locations(struct gl_shader_program *prog, var->name); } + if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) { + for (unsigned j = *location / 4u; j < slot_end / 4u; j++) + components[j] = 4; + components[slot_end / 4u] = (slot_end & 3) + 1; + } + this->matches[i].generic_location = *location; - *location += this->matches[i].num_components; + *location = slot_end + 1; } return (generic_location + 3) / 4; @@ -1562,6 +1888,12 @@ varying_matches::assign_locations(struct gl_shader_program *prog, void varying_matches::store_locations() const { + /* Check is location needs to be packed with lower_packed_varyings() or if + * we can just use ARB_enhanced_layouts packing. + */ + bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 }; + const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} }; + for (unsigned i = 0; i < this->num_matches; i++) { ir_variable *producer_var = this->matches[i].producer_var; ir_variable *consumer_var = this->matches[i].consumer_var; @@ -1579,6 +1911,64 @@ varying_matches::store_locations() const consumer_var->data.location = VARYING_SLOT_VAR0 + slot; consumer_var->data.location_frac = offset; } + + /* Find locations suitable for native packing via + * ARB_enhanced_layouts. + */ + if (producer_var && consumer_var) { + if (enhanced_layouts_enabled) { + const glsl_type *type = + get_varying_type(producer_var, producer_stage); + if (type->is_array() || type->is_matrix() || type->is_record() || + type->is_double()) { + unsigned comp_slots = type->component_slots() + offset; + unsigned slots = comp_slots / 4; + if (comp_slots % 4) + slots += 1; + + for (unsigned j = 0; j < slots; j++) { + pack_loc[slot + j] = true; + } + } else if (offset + type->vector_elements > 4) { + pack_loc[slot] = true; + pack_loc[slot + 1] = true; + } else { + loc_type[slot][offset] = type; + } + } + } + } + + /* Attempt to use ARB_enhanced_layouts for more efficient packing if + * suitable. + */ + if (enhanced_layouts_enabled) { + for (unsigned i = 0; i < this->num_matches; i++) { + ir_variable *producer_var = this->matches[i].producer_var; + ir_variable *consumer_var = this->matches[i].consumer_var; + unsigned generic_location = this->matches[i].generic_location; + unsigned slot = generic_location / 4; + + if (pack_loc[slot] || !producer_var || !consumer_var) + continue; + + const glsl_type *type = + get_varying_type(producer_var, producer_stage); + bool type_match = true; + for (unsigned j = 0; j < 4; j++) { + if (loc_type[slot][j]) { + if (type->base_type != loc_type[slot][j]->base_type) + type_match = false; + } + } + + if (type_match) { + producer_var->data.explicit_location = 1; + consumer_var->data.explicit_location = 1; + producer_var->data.explicit_component = 1; + consumer_var->data.explicit_component = 1; + } + } } } @@ -1607,10 +1997,17 @@ varying_matches::compute_packing_class(const ir_variable *var) * * Therefore, the packing class depends only on the interpolation type. */ - unsigned packing_class = var->data.centroid | (var->data.sample << 1) | - (var->data.patch << 2); - packing_class *= 4; - packing_class += var->data.interpolation; + const unsigned interp = var->is_interpolation_flat() + ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation; + + assert(interp < (1 << 3)); + + const unsigned packing_class = (interp << 0) | + (var->data.centroid << 3) | + (var->data.sample << 4) | + (var->data.patch << 5) | + (var->data.must_be_shader_input << 6); + return packing_class; } @@ -1625,7 +2022,7 @@ varying_matches::compute_packing_order(const ir_variable *var) { const glsl_type *element_type = var->type; - while (element_type->base_type == GLSL_TYPE_ARRAY) { + while (element_type->is_array()) { element_type = element_type->fields.array; } @@ -1667,7 +2064,7 @@ varying_matches::xfb_comparator(const void *x_generic, const void *y_generic) const match *x = (const match *) x_generic; if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) - return match_comparator(x_generic, y_generic); + return match_comparator(x_generic, y_generic); /* FIXME: When the comparator returns 0 it means the elements being * compared are equivalent. However the qsort documentation says: @@ -1735,25 +2132,27 @@ public: this->toplevel_var = var; this->varying_floats = 0; - program_resource_visitor::process(var); + program_resource_visitor::process(var, false); } private: virtual void visit_field(const glsl_type *type, const char *name, - bool row_major) + bool /* row_major */, + const glsl_type * /* record_type */, + const enum glsl_interface_packing, + bool /* last_field */) { assert(!type->without_array()->is_record()); assert(!type->without_array()->is_interface()); - (void) row_major; - tfeedback_candidate *candidate = rzalloc(this->mem_ctx, tfeedback_candidate); candidate->toplevel_var = this->toplevel_var; candidate->type = type; candidate->offset = this->varying_floats; - hash_table_insert(this->tfeedback_candidates, candidate, - ralloc_strdup(this->mem_ctx, name)); + _mesa_hash_table_insert(this->tfeedback_candidates, + ralloc_strdup(this->mem_ctx, name), + candidate); this->varying_floats += type->component_slots(); } @@ -1824,11 +2223,12 @@ populate_consumer_input_sets(void *mem_ctx, exec_list *ir, ralloc_asprintf(mem_ctx, "%s.%s", input_var->get_interface_type()->without_array()->name, input_var->name); - hash_table_insert(consumer_interface_inputs, input_var, - iface_field_name); + _mesa_hash_table_insert(consumer_interface_inputs, + iface_field_name, input_var); } else { - hash_table_insert(consumer_inputs, input_var, - ralloc_strdup(mem_ctx, input_var->name)); + _mesa_hash_table_insert(consumer_inputs, + ralloc_strdup(mem_ctx, input_var->name), + input_var); } } } @@ -1856,12 +2256,11 @@ get_matching_input(void *mem_ctx, ralloc_asprintf(mem_ctx, "%s.%s", output_var->get_interface_type()->without_array()->name, output_var->name); - input_var = - (ir_variable *) hash_table_find(consumer_interface_inputs, - iface_field_name); + hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name); + input_var = entry ? (ir_variable *) entry->data : NULL; } else { - input_var = - (ir_variable *) hash_table_find(consumer_inputs, output_var->name); + hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name); + input_var = entry ? (ir_variable *) entry->data : NULL; } return (input_var == NULL || input_var->data.mode != ir_var_shader_in) @@ -1938,7 +2337,8 @@ canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) * with a max of MAX_VARYING. */ static uint64_t -reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode) +reserved_varying_slot(struct gl_linked_shader *stage, + ir_variable_mode io_mode) { assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); /* Avoid an overflow of the returned value */ @@ -1961,7 +2361,8 @@ reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode) var_slot = var->data.location - VARYING_SLOT_VAR0; unsigned num_elements = get_varying_type(var, stage->Stage) - ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX); + ->count_attribute_slots(io_mode == ir_var_shader_in && + stage->Stage == MESA_SHADER_VERTEX); for (unsigned i = 0; i < num_elements; i++) { if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH) slots |= UINT64_C(1) << var_slot; @@ -1993,13 +2394,15 @@ reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode) * be NULL. In this case, varying locations are assigned solely based on the * requirements of transform feedback. */ -bool +static bool assign_varying_locations(struct gl_context *ctx, void *mem_ctx, struct gl_shader_program *prog, - gl_shader *producer, gl_shader *consumer, + gl_linked_shader *producer, + gl_linked_shader *consumer, unsigned num_tfeedback_decls, - tfeedback_decl *tfeedback_decls) + tfeedback_decl *tfeedback_decls, + const uint64_t reserved_slots) { /* Tessellation shaders treat inputs and outputs as shared memory and can * access inputs and outputs of other invocations. @@ -2018,51 +2421,33 @@ assign_varying_locations(struct gl_context *ctx, bool xfb_enabled = ctx->Extensions.EXT_transform_feedback && !unpackable_tess; - /* Disable varying packing for GL 4.4+ as there is no guarantee - * that interpolation qualifiers will match between shaders in these - * versions. We also disable packing on outward facing interfaces for - * SSO because in ES we need to retain the unpacked varying information - * for draw time validation. For desktop GL we could allow packing for - * versions < 4.4 but it's just safer not to do packing. + /* Disable packing on outward facing interfaces for SSO because in ES we + * need to retain the unpacked varying information for draw time + * validation. * * Packing is still enabled on individual arrays, structs, and matrices as * these are required by the transform feedback code and it is still safe * to do so. We also enable packing when a varying is only used for * transform feedback and its not a SSO. - * - * Varying packing currently only packs together varyings with matching - * interpolation qualifiers as the backends assume all packed components - * are to be processed in the same way. Therefore we cannot do packing in - * these versions of GL without the risk of mismatching interfaces. - * - * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec: - * - * "The type and presence of interpolation qualifiers of variables with - * the same name declared in all linked shaders for the same cross-stage - * interface must match, otherwise the link command will fail. - * - * When comparing an output from one stage to an input of a subsequent - * stage, the input and output don't match if their interpolation - * qualifiers (or lack thereof) are not the same." - * - * This text was also in at least revison 7 of the 4.40 spec but is no - * longer in revision 9 and not in the 4.50 spec. */ bool disable_varying_packing = ctx->Const.DisableVaryingPacking || unpackable_tess; - if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) || - (prog->SeparateShader && (producer == NULL || consumer == NULL))) + if (prog->SeparateShader && (producer == NULL || consumer == NULL)) disable_varying_packing = true; varying_matches matches(disable_varying_packing, xfb_enabled, - producer ? producer->Stage : (gl_shader_stage)-1, - consumer ? consumer->Stage : (gl_shader_stage)-1); - hash_table *tfeedback_candidates - = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); - hash_table *consumer_inputs - = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); - hash_table *consumer_interface_inputs - = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); + ctx->Extensions.ARB_enhanced_layouts, + producer ? producer->Stage : MESA_SHADER_NONE, + consumer ? consumer->Stage : MESA_SHADER_NONE); + hash_table *tfeedback_candidates = + _mesa_hash_table_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal); + hash_table *consumer_inputs = + _mesa_hash_table_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal); + hash_table *consumer_interface_inputs = + _mesa_hash_table_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal); ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = { NULL, }; @@ -2128,7 +2513,7 @@ assign_varying_locations(struct gl_context *ctx, * within a patch and can be used as shared memory. */ if (input_var || (prog->SeparateShader && consumer == NULL) || - producer->Type == GL_TESS_CONTROL_SHADER) { + producer->Stage == MESA_SHADER_TESS_CTRL) { matches.record(output_var, input_var); } @@ -2149,11 +2534,9 @@ assign_varying_locations(struct gl_context *ctx, */ foreach_in_list(ir_instruction, node, consumer->ir) { ir_variable *const input_var = node->as_variable(); - - if (input_var == NULL || input_var->data.mode != ir_var_shader_in) - continue; - - matches.record(NULL, input_var); + if (input_var && input_var->data.mode == ir_var_shader_in) { + matches.record(NULL, input_var); + } } } @@ -2165,40 +2548,52 @@ assign_varying_locations(struct gl_context *ctx, = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates); if (matched_candidate == NULL) { - hash_table_dtor(tfeedback_candidates); - hash_table_dtor(consumer_inputs); - hash_table_dtor(consumer_interface_inputs); + _mesa_hash_table_destroy(tfeedback_candidates, NULL); return false; } + /* Mark xfb varyings as always active */ + matched_candidate->toplevel_var->data.always_active_io = 1; + + /* Mark any corresponding inputs as always active also. We must do this + * because we have a NIR pass that lowers vectors to scalars and another + * that removes unused varyings. + * We don't split varyings marked as always active because there is no + * point in doing so. This means we need to mark both sides of the + * interface as always active otherwise we will have a mismatch and + * start removing things we shouldn't. + */ + ir_variable *const input_var = + linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var, + consumer_inputs, + consumer_interface_inputs, + consumer_inputs_with_locations); + if (input_var) + input_var->data.always_active_io = 1; + if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) { matched_candidate->toplevel_var->data.is_xfb_only = 1; matches.record(matched_candidate->toplevel_var, NULL); } } - const uint64_t reserved_slots = - reserved_varying_slot(producer, ir_var_shader_out) | - reserved_varying_slot(consumer, ir_var_shader_in); + _mesa_hash_table_destroy(consumer_inputs, NULL); + _mesa_hash_table_destroy(consumer_interface_inputs, NULL); - const unsigned slots_used = matches.assign_locations(prog, reserved_slots); + uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0}; + const unsigned slots_used = matches.assign_locations( + prog, components, reserved_slots); matches.store_locations(); for (unsigned i = 0; i < num_tfeedback_decls; ++i) { - if (!tfeedback_decls[i].is_varying()) - continue; - - if (!tfeedback_decls[i].assign_location(ctx, prog)) { - hash_table_dtor(tfeedback_candidates); - hash_table_dtor(consumer_inputs); - hash_table_dtor(consumer_interface_inputs); - return false; + if (tfeedback_decls[i].is_varying()) { + if (!tfeedback_decls[i].assign_location(ctx, prog)) { + _mesa_hash_table_destroy(tfeedback_candidates, NULL); + return false; + } } } - - hash_table_dtor(tfeedback_candidates); - hash_table_dtor(consumer_inputs); - hash_table_dtor(consumer_interface_inputs); + _mesa_hash_table_destroy(tfeedback_candidates, NULL); if (consumer && producer) { foreach_in_list(ir_instruction, node, consumer->ir) { @@ -2206,7 +2601,7 @@ assign_varying_locations(struct gl_context *ctx, if (var && var->data.mode == ir_var_shader_in && var->data.is_unmatched_generic_inout) { - if (!prog->IsES && prog->Version <= 120) { + if (!prog->IsES && prog->data->Version <= 120) { /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: * * Only those varying variables used (i.e. read) in @@ -2246,13 +2641,13 @@ assign_varying_locations(struct gl_context *ctx, } if (producer) { - lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, + lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out, 0, producer, disable_varying_packing, xfb_enabled); } if (consumer) { - lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, + lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in, consumer_vertices, consumer, disable_varying_packing, xfb_enabled); } @@ -2260,17 +2655,19 @@ assign_varying_locations(struct gl_context *ctx, return true; } -bool +static bool check_against_output_limit(struct gl_context *ctx, struct gl_shader_program *prog, - gl_shader *producer) + gl_linked_shader *producer, + unsigned num_explicit_locations) { - unsigned output_vectors = 0; + unsigned output_vectors = num_explicit_locations; foreach_in_list(ir_instruction, node, producer->ir) { ir_variable *const var = node->as_variable(); - if (var && var->data.mode == ir_var_shader_out && + if (var && !var->data.explicit_location && + var->data.mode == ir_var_shader_out && var_counts_against_varying_limit(producer->Stage, var)) { /* outputs for fragment shader can't be doubles */ output_vectors += var->type->count_attribute_slots(false); @@ -2302,17 +2699,19 @@ check_against_output_limit(struct gl_context *ctx, return true; } -bool +static bool check_against_input_limit(struct gl_context *ctx, struct gl_shader_program *prog, - gl_shader *consumer) + gl_linked_shader *consumer, + unsigned num_explicit_locations) { - unsigned input_vectors = 0; + unsigned input_vectors = num_explicit_locations; foreach_in_list(ir_instruction, node, consumer->ir) { ir_variable *const var = node->as_variable(); - if (var && var->data.mode == ir_var_shader_in && + if (var && !var->data.explicit_location && + var->data.mode == ir_var_shader_in && var_counts_against_varying_limit(consumer->Stage, var)) { /* vertex inputs aren't varying counted */ input_vectors += var->type->count_attribute_slots(false); @@ -2343,3 +2742,160 @@ check_against_input_limit(struct gl_context *ctx, return true; } + +bool +link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last, + struct gl_context *ctx, void *mem_ctx) +{ + bool has_xfb_qualifiers = false; + unsigned num_tfeedback_decls = 0; + char **varying_names = NULL; + tfeedback_decl *tfeedback_decls = NULL; + + /* From the ARB_enhanced_layouts spec: + * + * "If the shader used to record output variables for transform feedback + * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout + * qualifiers, the values specified by TransformFeedbackVaryings are + * ignored, and the set of variables captured for transform feedback is + * instead derived from the specified layout qualifiers." + */ + for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) { + /* Find last stage before fragment shader */ + if (prog->_LinkedShaders[i]) { + has_xfb_qualifiers = + process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i], + prog, &num_tfeedback_decls, + &varying_names); + break; + } + } + + if (!has_xfb_qualifiers) { + num_tfeedback_decls = prog->TransformFeedback.NumVarying; + varying_names = prog->TransformFeedback.VaryingNames; + } + + if (num_tfeedback_decls != 0) { + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * the specified by TransformFeedbackVaryingsEXT is + * non-zero, but the program object has no vertex or geometry + * shader; + */ + if (first >= MESA_SHADER_FRAGMENT) { + linker_error(prog, "Transform feedback varyings specified, but " + "no vertex, tessellation, or geometry shader is " + "present.\n"); + return false; + } + + tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl, + num_tfeedback_decls); + if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls, + varying_names, tfeedback_decls)) + return false; + } + + /* If there is no fragment shader we need to set transform feedback. + * + * For SSO we also need to assign output locations. We assign them here + * because we need to do it for both single stage programs and multi stage + * programs. + */ + if (last < MESA_SHADER_FRAGMENT && + (num_tfeedback_decls != 0 || prog->SeparateShader)) { + const uint64_t reserved_out_slots = + reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out); + if (!assign_varying_locations(ctx, mem_ctx, prog, + prog->_LinkedShaders[last], NULL, + num_tfeedback_decls, tfeedback_decls, + reserved_out_slots)) + return false; + } + + if (last <= MESA_SHADER_FRAGMENT) { + /* Remove unused varyings from the first/last stage unless SSO */ + remove_unused_shader_inputs_and_outputs(prog->SeparateShader, + prog->_LinkedShaders[first], + ir_var_shader_in); + remove_unused_shader_inputs_and_outputs(prog->SeparateShader, + prog->_LinkedShaders[last], + ir_var_shader_out); + + /* If the program is made up of only a single stage */ + if (first == last) { + gl_linked_shader *const sh = prog->_LinkedShaders[last]; + + do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL); + do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls, + tfeedback_decls); + + if (prog->SeparateShader) { + const uint64_t reserved_slots = + reserved_varying_slot(sh, ir_var_shader_in); + + /* Assign input locations for SSO, output locations are already + * assigned. + */ + if (!assign_varying_locations(ctx, mem_ctx, prog, + NULL /* producer */, + sh /* consumer */, + 0 /* num_tfeedback_decls */, + NULL /* tfeedback_decls */, + reserved_slots)) + return false; + } + } else { + /* Linking the stages in the opposite order (from fragment to vertex) + * ensures that inter-shader outputs written to in an earlier stage + * are eliminated if they are (transitively) not used in a later + * stage. + */ + int next = last; + for (int i = next - 1; i >= 0; i--) { + if (prog->_LinkedShaders[i] == NULL && i != 0) + continue; + + gl_linked_shader *const sh_i = prog->_LinkedShaders[i]; + gl_linked_shader *const sh_next = prog->_LinkedShaders[next]; + + const uint64_t reserved_out_slots = + reserved_varying_slot(sh_i, ir_var_shader_out); + const uint64_t reserved_in_slots = + reserved_varying_slot(sh_next, ir_var_shader_in); + + do_dead_builtin_varyings(ctx, sh_i, sh_next, + next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, + tfeedback_decls); + + if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, + next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, + tfeedback_decls, + reserved_out_slots | reserved_in_slots)) + return false; + + /* This must be done after all dead varyings are eliminated. */ + if (sh_i != NULL) { + unsigned slots_used = _mesa_bitcount_64(reserved_out_slots); + if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) { + return false; + } + } + + unsigned slots_used = _mesa_bitcount_64(reserved_in_slots); + if (!check_against_input_limit(ctx, prog, sh_next, slots_used)) + return false; + + next = i; + } + } + } + + if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls, + has_xfb_qualifiers)) + return false; + + return true; +}