*/
+#include "main/errors.h"
#include "main/mtypes.h"
#include "glsl_symbol_table.h"
#include "glsl_parser_extras.h"
#include "linker.h"
#include "link_varyings.h"
#include "main/macros.h"
-#include "program/hash_table.h"
+#include "util/hash_table.h"
+#include "util/u_math.h"
#include "program.h"
create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
NULL, NULL, varying_names);
- } else if (t->is_record()) {
+ } else if (t->is_struct()) {
for (unsigned i = 0; i < t->length; i++) {
const char *field = t->fields.structure[i].name;
size_t new_length = name_length;
new_length, count, NULL, NULL,
varying_names);
}
- } else if (t->without_array()->is_record() ||
+ } else if (t->without_array()->is_struct() ||
t->without_array()->is_interface() ||
(t->is_array() && t->fields.array->is_array())) {
for (unsigned i = 0; i < t->length; i++) {
}
}
-bool
-process_xfb_layout_qualifiers(void *mem_ctx, const gl_shader *sh,
+static bool
+process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
+ struct gl_shader_program *prog,
unsigned *num_tfeedback_decls,
char ***varying_names)
{
* xfb_stride to interface block members so this will catch that case also.
*/
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
- if (sh->TransformFeedback.BufferStride[j]) {
+ if (prog->TransformFeedback.BufferStride[j]) {
has_xfb_qualifiers = true;
+ break;
}
}
if (var->data.from_named_ifc_block) {
type = var->get_interface_type();
+
/* Find the member type before it was altered by lowering */
+ const glsl_type *type_wa = type->without_array();
member_type =
- type->fields.structure[type->field_index(var->name)].type;
- name = ralloc_strdup(NULL, type->without_array()->name);
+ type_wa->fields.structure[type_wa->field_index(var->name)].type;
+ name = ralloc_strdup(NULL, type_wa->name);
} else {
type = var->type;
member_type = NULL;
return has_xfb_qualifiers;
}
-static bool
-anonymous_struct_type_matches(const glsl_type *output_type,
- const glsl_type *to_match)
-{
- while (output_type->is_array() && to_match->is_array()) {
- /* if the lengths at each level don't match fail. */
- if (output_type->length != to_match->length)
- return false;
- output_type = output_type->fields.array;
- to_match = to_match->fields.array;
- }
-
- if (output_type->is_array() || to_match->is_array())
- return false;
- return output_type->is_anonymous() &&
- to_match->is_anonymous() &&
- to_match->record_compare(output_type);
-}
-
/**
* Validate the types and qualifiers of an output from one stage against the
* matching input to another stage.
*/
static void
-cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
+cross_validate_types_and_qualifiers(struct gl_context *ctx,
+ struct gl_shader_program *prog,
const ir_variable *input,
const ir_variable *output,
gl_shader_stage consumer_stage,
}
if (type_to_match != output->type) {
- /* There is a bit of a special case for gl_TexCoord. This
- * built-in is unsized by default. Applications that variable
- * access it must redeclare it with a size. There is some
- * language in the GLSL spec that implies the fragment shader
- * and vertex shader do not have to agree on this size. Other
- * driver behave this way, and one or two applications seem to
- * rely on it.
- *
- * Neither declaration needs to be modified here because the array
- * sizes are fixed later when update_array_sizes is called.
- *
- * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
- *
- * "Unlike user-defined varying variables, the built-in
- * varying variables don't have a strict one-to-one
- * correspondence between the vertex language and the
- * fragment language."
- */
- if (!output->type->is_array() || !is_gl_identifier(output->name)) {
- bool anon_matches = anonymous_struct_type_matches(output->type, type_to_match);
-
- if (!anon_matches) {
+ if (output->type->is_struct()) {
+ /* Structures across shader stages can have different name
+ * and considered to match in type if and only if structure
+ * members match in name, type, qualification, and declaration
+ * order. The precision doesn’t need to match.
+ */
+ if (!output->type->record_compare(type_to_match,
+ false, /* match_name */
+ true, /* match_locations */
+ false /* match_precision */)) {
linker_error(prog,
- "%s shader output `%s' declared as type `%s', "
- "but %s shader input declared as type `%s'\n",
- _mesa_shader_stage_to_string(producer_stage),
- output->name,
- output->type->name,
- _mesa_shader_stage_to_string(consumer_stage),
- input->type->name);
- return;
+ "%s shader output `%s' declared as struct `%s', "
+ "doesn't match in type with %s shader input "
+ "declared as struct `%s'\n",
+ _mesa_shader_stage_to_string(producer_stage),
+ output->name,
+ output->type->name,
+ _mesa_shader_stage_to_string(consumer_stage),
+ input->type->name);
}
+ } else if (!output->type->is_array() || !is_gl_identifier(output->name)) {
+ /* There is a bit of a special case for gl_TexCoord. This
+ * built-in is unsized by default. Applications that variable
+ * access it must redeclare it with a size. There is some
+ * language in the GLSL spec that implies the fragment shader
+ * and vertex shader do not have to agree on this size. Other
+ * driver behave this way, and one or two applications seem to
+ * rely on it.
+ *
+ * Neither declaration needs to be modified here because the array
+ * sizes are fixed later when update_array_sizes is called.
+ *
+ * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
+ *
+ * "Unlike user-defined varying variables, the built-in
+ * varying variables don't have a strict one-to-one
+ * correspondence between the vertex language and the
+ * fragment language."
+ */
+ linker_error(prog,
+ "%s shader output `%s' declared as type `%s', "
+ "but %s shader input declared as type `%s'\n",
+ _mesa_shader_stage_to_string(producer_stage),
+ output->name,
+ output->type->name,
+ _mesa_shader_stage_to_string(consumer_stage),
+ input->type->name);
+ return;
}
}
* OpenGLES 3.0 drivers, so we relax the checking in all cases.
*/
if (false /* always skip the centroid check */ &&
- prog->Version < (prog->IsES ? 310 : 430) &&
+ prog->data->Version < (prog->IsES ? 310 : 430) &&
input->data.centroid != output->data.centroid) {
linker_error(prog,
"%s shader output `%s' %s centroid qualifier, "
return;
}
- if (!prog->IsES && input->data.invariant != output->data.invariant) {
+ /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
+ *
+ * "As only outputs need be declared with invariant, an output from
+ * one shader stage will still match an input of a subsequent stage
+ * without the input being declared as invariant."
+ *
+ * while GLSL 4.20 says:
+ *
+ * "For variables leaving one shader and coming into another shader,
+ * the invariant keyword has to be used in both shaders, or a link
+ * error will result."
+ *
+ * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
+ *
+ * "The invariance of varyings that are declared in both the vertex
+ * and fragment shaders must match."
+ */
+ if (input->data.explicit_invariant != output->data.explicit_invariant &&
+ prog->data->Version < (prog->IsES ? 300 : 430)) {
linker_error(prog,
"%s shader output `%s' %s invariant qualifier, "
"but %s shader input %s invariant qualifier\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
- (output->data.invariant) ? "has" : "lacks",
+ (output->data.explicit_invariant) ? "has" : "lacks",
_mesa_shader_stage_to_string(consumer_stage),
- (input->data.invariant) ? "has" : "lacks");
+ (input->data.explicit_invariant) ? "has" : "lacks");
return;
}
* "It is a link-time error if, within the same stage, the interpolation
* qualifiers of variables of the same name do not match.
*
+ * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
+ *
+ * "When no interpolation qualifier is present, smooth interpolation
+ * is used."
+ *
+ * So we match variables where one is smooth and the other has no explicit
+ * qualifier.
*/
- if (input->data.interpolation != output->data.interpolation &&
- prog->Version < 440) {
- linker_error(prog,
- "%s shader output `%s' specifies %s "
- "interpolation qualifier, "
- "but %s shader input specifies %s "
- "interpolation qualifier\n",
- _mesa_shader_stage_to_string(producer_stage),
- output->name,
- interpolation_string(output->data.interpolation),
- _mesa_shader_stage_to_string(consumer_stage),
- interpolation_string(input->data.interpolation));
- return;
+ unsigned input_interpolation = input->data.interpolation;
+ unsigned output_interpolation = output->data.interpolation;
+ if (prog->IsES) {
+ if (input_interpolation == INTERP_MODE_NONE)
+ input_interpolation = INTERP_MODE_SMOOTH;
+ if (output_interpolation == INTERP_MODE_NONE)
+ output_interpolation = INTERP_MODE_SMOOTH;
+ }
+ if (input_interpolation != output_interpolation &&
+ prog->data->Version < 440) {
+ if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) {
+ linker_error(prog,
+ "%s shader output `%s' specifies %s "
+ "interpolation qualifier, "
+ "but %s shader input specifies %s "
+ "interpolation qualifier\n",
+ _mesa_shader_stage_to_string(producer_stage),
+ output->name,
+ interpolation_string(output->data.interpolation),
+ _mesa_shader_stage_to_string(consumer_stage),
+ interpolation_string(input->data.interpolation));
+ return;
+ } else {
+ linker_warning(prog,
+ "%s shader output `%s' specifies %s "
+ "interpolation qualifier, "
+ "but %s shader input specifies %s "
+ "interpolation qualifier\n",
+ _mesa_shader_stage_to_string(producer_stage),
+ output->name,
+ interpolation_string(output->data.interpolation),
+ _mesa_shader_stage_to_string(consumer_stage),
+ interpolation_string(input->data.interpolation));
+ }
}
}
* Validate front and back color outputs against single color input
*/
static void
-cross_validate_front_and_back_color(struct gl_shader_program *prog,
+cross_validate_front_and_back_color(struct gl_context *ctx,
+ struct gl_shader_program *prog,
const ir_variable *input,
const ir_variable *front_color,
const ir_variable *back_color,
gl_shader_stage producer_stage)
{
if (front_color != NULL && front_color->data.assigned)
- cross_validate_types_and_qualifiers(prog, input, front_color,
+ cross_validate_types_and_qualifiers(ctx, prog, input, front_color,
consumer_stage, producer_stage);
if (back_color != NULL && back_color->data.assigned)
- cross_validate_types_and_qualifiers(prog, input, back_color,
+ cross_validate_types_and_qualifiers(ctx, prog, input, back_color,
consumer_stage, producer_stage);
}
+static unsigned
+compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
+{
+ unsigned location_start = VARYING_SLOT_VAR0;
+
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ if (var->data.mode == ir_var_shader_in)
+ location_start = VERT_ATTRIB_GENERIC0;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ if (var->data.patch)
+ location_start = VARYING_SLOT_PATCH0;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.mode == ir_var_shader_out)
+ location_start = FRAG_RESULT_DATA0;
+ break;
+ default:
+ break;
+ }
+
+ return var->data.location - location_start;
+}
+
+struct explicit_location_info {
+ ir_variable *var;
+ bool base_type_is_integer;
+ unsigned base_type_bit_size;
+ unsigned interpolation;
+ bool centroid;
+ bool sample;
+ bool patch;
+};
+
+static bool
+check_location_aliasing(struct explicit_location_info explicit_locations[][4],
+ ir_variable *var,
+ unsigned location,
+ unsigned component,
+ unsigned location_limit,
+ const glsl_type *type,
+ unsigned interpolation,
+ bool centroid,
+ bool sample,
+ bool patch,
+ gl_shader_program *prog,
+ gl_shader_stage stage)
+{
+ unsigned last_comp;
+ unsigned base_type_bit_size;
+ const glsl_type *type_without_array = type->without_array();
+ const bool base_type_is_integer =
+ glsl_base_type_is_integer(type_without_array->base_type);
+ const bool is_struct = type_without_array->is_struct();
+ if (is_struct) {
+ /* structs don't have a defined underlying base type so just treat all
+ * component slots as used and set the bit size to 0. If there is
+ * location aliasing, we'll fail anyway later.
+ */
+ last_comp = 4;
+ base_type_bit_size = 0;
+ } else {
+ unsigned dmul = type_without_array->is_64bit() ? 2 : 1;
+ last_comp = component + type_without_array->vector_elements * dmul;
+ base_type_bit_size =
+ glsl_base_type_get_bit_size(type_without_array->base_type);
+ }
+
+ while (location < location_limit) {
+ unsigned comp = 0;
+ while (comp < 4) {
+ struct explicit_location_info *info =
+ &explicit_locations[location][comp];
+
+ if (info->var) {
+ if (info->var->type->without_array()->is_struct() || is_struct) {
+ /* Structs cannot share location since they are incompatible
+ * with any other underlying numerical type.
+ */
+ linker_error(prog,
+ "%s shader has multiple %sputs sharing the "
+ "same location that don't have the same "
+ "underlying numerical type. Struct variable '%s', "
+ "location %u\n",
+ _mesa_shader_stage_to_string(stage),
+ var->data.mode == ir_var_shader_in ? "in" : "out",
+ is_struct ? var->name : info->var->name,
+ location);
+ return false;
+ } else if (comp >= component && comp < last_comp) {
+ /* Component aliasing is not allowed */
+ linker_error(prog,
+ "%s shader has multiple %sputs explicitly "
+ "assigned to location %d and component %d\n",
+ _mesa_shader_stage_to_string(stage),
+ var->data.mode == ir_var_shader_in ? "in" : "out",
+ location, comp);
+ return false;
+ } else {
+ /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
+ * Qualifiers, Page 67, (Location aliasing):
+ *
+ * " Further, when location aliasing, the aliases sharing the
+ * location must have the same underlying numerical type
+ * and bit width (floating-point or integer, 32-bit versus
+ * 64-bit, etc.) and the same auxiliary storage and
+ * interpolation qualification."
+ */
+
+ /* If the underlying numerical type isn't integer, implicitly
+ * it will be float or else we would have failed by now.
+ */
+ if (info->base_type_is_integer != base_type_is_integer) {
+ linker_error(prog,
+ "%s shader has multiple %sputs sharing the "
+ "same location that don't have the same "
+ "underlying numerical type. Location %u "
+ "component %u.\n",
+ _mesa_shader_stage_to_string(stage),
+ var->data.mode == ir_var_shader_in ?
+ "in" : "out", location, comp);
+ return false;
+ }
+
+ if (info->base_type_bit_size != base_type_bit_size) {
+ linker_error(prog,
+ "%s shader has multiple %sputs sharing the "
+ "same location that don't have the same "
+ "underlying numerical bit size. Location %u "
+ "component %u.\n",
+ _mesa_shader_stage_to_string(stage),
+ var->data.mode == ir_var_shader_in ?
+ "in" : "out", location, comp);
+ return false;
+ }
+
+ if (info->interpolation != interpolation) {
+ linker_error(prog,
+ "%s shader has multiple %sputs sharing the "
+ "same location that don't have the same "
+ "interpolation qualification. Location %u "
+ "component %u.\n",
+ _mesa_shader_stage_to_string(stage),
+ var->data.mode == ir_var_shader_in ?
+ "in" : "out", location, comp);
+ return false;
+ }
+
+ if (info->centroid != centroid ||
+ info->sample != sample ||
+ info->patch != patch) {
+ linker_error(prog,
+ "%s shader has multiple %sputs sharing the "
+ "same location that don't have the same "
+ "auxiliary storage qualification. Location %u "
+ "component %u.\n",
+ _mesa_shader_stage_to_string(stage),
+ var->data.mode == ir_var_shader_in ?
+ "in" : "out", location, comp);
+ return false;
+ }
+ }
+ } else if (comp >= component && comp < last_comp) {
+ info->var = var;
+ info->base_type_is_integer = base_type_is_integer;
+ info->base_type_bit_size = base_type_bit_size;
+ info->interpolation = interpolation;
+ info->centroid = centroid;
+ info->sample = sample;
+ info->patch = patch;
+ }
+
+ comp++;
+
+ /* We need to do some special handling for doubles as dvec3 and
+ * dvec4 consume two consecutive locations. We don't need to
+ * worry about components beginning at anything other than 0 as
+ * the spec does not allow this for dvec3 and dvec4.
+ */
+ if (comp == 4 && last_comp > 4) {
+ last_comp = last_comp - 4;
+ /* Bump location index and reset the component index */
+ location++;
+ comp = 0;
+ component = 0;
+ }
+ }
+
+ location++;
+ }
+
+ return true;
+}
+
+static bool
+validate_explicit_variable_location(struct gl_context *ctx,
+ struct explicit_location_info explicit_locations[][4],
+ ir_variable *var,
+ gl_shader_program *prog,
+ gl_linked_shader *sh)
+{
+ const glsl_type *type = get_varying_type(var, sh->Stage);
+ unsigned num_elements = type->count_attribute_slots(false);
+ unsigned idx = compute_variable_location_slot(var, sh->Stage);
+ unsigned slot_limit = idx + num_elements;
+
+ /* Vertex shader inputs and fragment shader outputs are validated in
+ * assign_attribute_or_color_locations() so we should not attempt to
+ * validate them again here.
+ */
+ unsigned slot_max;
+ if (var->data.mode == ir_var_shader_out) {
+ assert(sh->Stage != MESA_SHADER_FRAGMENT);
+ slot_max =
+ ctx->Const.Program[sh->Stage].MaxOutputComponents / 4;
+ } else {
+ assert(var->data.mode == ir_var_shader_in);
+ assert(sh->Stage != MESA_SHADER_VERTEX);
+ slot_max =
+ ctx->Const.Program[sh->Stage].MaxInputComponents / 4;
+ }
+
+ if (slot_limit > slot_max) {
+ linker_error(prog,
+ "Invalid location %u in %s shader\n",
+ idx, _mesa_shader_stage_to_string(sh->Stage));
+ return false;
+ }
+
+ const glsl_type *type_without_array = type->without_array();
+ if (type_without_array->is_interface()) {
+ for (unsigned i = 0; i < type_without_array->length; i++) {
+ glsl_struct_field *field = &type_without_array->fields.structure[i];
+ unsigned field_location = field->location -
+ (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
+ if (!check_location_aliasing(explicit_locations, var,
+ field_location,
+ 0, field_location + 1,
+ field->type,
+ field->interpolation,
+ field->centroid,
+ field->sample,
+ field->patch,
+ prog, sh->Stage)) {
+ return false;
+ }
+ }
+ } else if (!check_location_aliasing(explicit_locations, var,
+ idx, var->data.location_frac,
+ slot_limit, type,
+ var->data.interpolation,
+ var->data.centroid,
+ var->data.sample,
+ var->data.patch,
+ prog, sh->Stage)) {
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Validate explicit locations for the inputs to the first stage and the
+ * outputs of the last stage in a program, if those are not the VS and FS
+ * shaders.
+ */
+void
+validate_first_and_last_interface_explicit_locations(struct gl_context *ctx,
+ struct gl_shader_program *prog,
+ gl_shader_stage first_stage,
+ gl_shader_stage last_stage)
+{
+ /* VS inputs and FS outputs are validated in
+ * assign_attribute_or_color_locations()
+ */
+ bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
+ bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
+ if (!validate_first_stage && !validate_last_stage)
+ return;
+
+ struct explicit_location_info explicit_locations[MAX_VARYING][4];
+
+ gl_shader_stage stages[2] = { first_stage, last_stage };
+ bool validate_stage[2] = { validate_first_stage, validate_last_stage };
+ ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out };
+
+ for (unsigned i = 0; i < 2; i++) {
+ if (!validate_stage[i])
+ continue;
+
+ gl_shader_stage stage = stages[i];
+
+ gl_linked_shader *sh = prog->_LinkedShaders[stage];
+ assert(sh);
+
+ memset(explicit_locations, 0, sizeof(explicit_locations));
+
+ foreach_in_list(ir_instruction, node, sh->ir) {
+ ir_variable *const var = node->as_variable();
+
+ if (var == NULL ||
+ !var->data.explicit_location ||
+ var->data.location < VARYING_SLOT_VAR0 ||
+ var->data.mode != var_direction[i])
+ continue;
+
+ if (!validate_explicit_variable_location(
+ ctx, explicit_locations, var, prog, sh)) {
+ return;
+ }
+ }
+ }
+}
+
/**
* Validate that outputs from one stage match inputs of another
*/
void
-cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
- gl_shader *producer, gl_shader *consumer)
+cross_validate_outputs_to_inputs(struct gl_context *ctx,
+ struct gl_shader_program *prog,
+ gl_linked_shader *producer,
+ gl_linked_shader *consumer)
{
glsl_symbol_table parameters;
- ir_variable *explicit_locations[MAX_VARYINGS_INCL_PATCH][4] =
- { {NULL, NULL} };
+ struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {};
+ struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {};
/* Find all shader outputs in the "producer" stage.
*/
/* User-defined varyings with explicit locations are handled
* differently because they do not need to have matching names.
*/
- const glsl_type *type = get_varying_type(var, producer->Stage);
- unsigned num_elements = type->count_attribute_slots(false);
- unsigned idx = var->data.location - VARYING_SLOT_VAR0;
- unsigned slot_limit = idx + num_elements;
- unsigned last_comp;
-
- if (var->type->without_array()->is_record()) {
- /* The component qualifier can't be used on structs so just treat
- * all component slots as used.
- */
- last_comp = 4;
- } else {
- unsigned dmul = var->type->is_64bit() ? 2 : 1;
- last_comp = var->data.location_frac +
- var->type->without_array()->vector_elements * dmul;
- }
-
- while (idx < slot_limit) {
- unsigned i = var->data.location_frac;
- while (i < last_comp) {
- if (explicit_locations[idx][i] != NULL) {
- linker_error(prog,
- "%s shader has multiple outputs explicitly "
- "assigned to location %d and component %d\n",
- _mesa_shader_stage_to_string(producer->Stage),
- idx, var->data.location_frac);
- return;
- }
-
- /* Make sure all component at this location have the same type.
- */
- for (unsigned j = 0; j < 4; j++) {
- if (explicit_locations[idx][j] &&
- (explicit_locations[idx][j]->type->without_array()
- ->base_type != var->type->without_array()->base_type)) {
- linker_error(prog,
- "Varyings sharing the same location must "
- "have the same underlying numerical type. "
- "Location %u component %u\n", idx,
- var->data.location_frac);
- return;
- }
- }
-
- explicit_locations[idx][i] = var;
- i++;
-
- /* We need to do some special handling for doubles as dvec3 and
- * dvec4 consume two consecutive locations. We don't need to
- * worry about components beginning at anything other than 0 as
- * the spec does not allow this for dvec3 and dvec4.
- */
- if (i == 3 && last_comp > 4) {
- last_comp = last_comp - 4;
- /* Bump location index and reset the component index */
- idx++;
- i = 0;
- }
- }
- idx++;
+ if (!validate_explicit_variable_location(ctx,
+ output_explicit_locations,
+ var, prog, producer)) {
+ return;
}
}
}
const ir_variable *const back_color =
parameters.get_variable("gl_BackColor");
- cross_validate_front_and_back_color(prog, input,
+ cross_validate_front_and_back_color(ctx, prog, input,
front_color, back_color,
consumer->Stage, producer->Stage);
} else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
const ir_variable *const back_color =
parameters.get_variable("gl_BackSecondaryColor");
- cross_validate_front_and_back_color(prog, input,
+ cross_validate_front_and_back_color(ctx, prog, input,
front_color, back_color,
consumer->Stage, producer->Stage);
} else {
const glsl_type *type = get_varying_type(input, consumer->Stage);
unsigned num_elements = type->count_attribute_slots(false);
- unsigned idx = input->data.location - VARYING_SLOT_VAR0;
+ unsigned idx =
+ compute_variable_location_slot(input, consumer->Stage);
unsigned slot_limit = idx + num_elements;
+ if (!validate_explicit_variable_location(ctx,
+ input_explicit_locations,
+ input, prog, consumer)) {
+ return;
+ }
+
while (idx < slot_limit) {
- output = explicit_locations[idx][input->data.location_frac];
+ if (idx >= MAX_VARYING) {
+ linker_error(prog,
+ "Invalid location %u in %s shader\n", idx,
+ _mesa_shader_stage_to_string(consumer->Stage));
+ return;
+ }
+
+ output = output_explicit_locations[idx][input->data.location_frac].var;
- if (output == NULL ||
- input->data.location != output->data.location) {
+ if (output == NULL) {
+ /* A linker failure should only happen when there is no
+ * output declaration and there is Static Use of the
+ * declared input.
+ */
+ if (input->data.used) {
+ linker_error(prog,
+ "%s shader input `%s' with explicit location "
+ "has no matching output\n",
+ _mesa_shader_stage_to_string(consumer->Stage),
+ input->name);
+ break;
+ }
+ } else if (input->data.location != output->data.location) {
linker_error(prog,
"%s shader input `%s' with explicit location "
"has no matching output\n",
*/
if (!(input->get_interface_type() &&
output->get_interface_type()))
- cross_validate_types_and_qualifiers(prog, input, output,
+ cross_validate_types_and_qualifiers(ctx, prog, input, output,
consumer->Stage,
producer->Stage);
} else {
*/
assert(!input->data.assigned);
if (input->data.used && !input->get_interface_type() &&
- !input->data.explicit_location && !prog->SeparateShader)
+ !input->data.explicit_location)
linker_error(prog,
"%s shader input `%s' "
"has no matching output in the previous stage\n",
* Demote shader inputs and outputs that are not used in other stages, and
* remove them via dead code elimination.
*/
-void
+static void
remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
- gl_shader *sh,
+ gl_linked_shader *sh,
enum ir_variable_mode mode)
{
if (is_separate_shader_object)
*/
if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
assert(var->data.mode != ir_var_temporary);
+
+ /* Assign zeros to demoted inputs to allow more optimizations. */
+ if (var->data.mode == ir_var_shader_in && !var->constant_value)
+ var->constant_value = ir_constant::zero(var, var->type);
+
var->data.mode = ir_var_auto;
}
}
unsigned actual_array_size;
switch (this->lowered_builtin_array_variable) {
case clip_distance:
- actual_array_size = prog->LastClipDistanceArraySize;
+ actual_array_size = prog->last_vert_prog ?
+ prog->last_vert_prog->info.clip_distance_array_size : 0;
break;
case cull_distance:
- actual_array_size = prog->LastCullDistanceArraySize;
+ actual_array_size = prog->last_vert_prog ?
+ prog->last_vert_prog->info.cull_distance_array_size : 0;
break;
case tess_level_outer:
actual_array_size = 4;
tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
struct gl_transform_feedback_info *info,
unsigned buffer, unsigned buffer_index,
- const unsigned max_outputs, bool *explicit_stride,
- bool has_xfb_qualifiers) const
+ const unsigned max_outputs,
+ BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
+ bool *explicit_stride, bool has_xfb_qualifiers,
+ const void* mem_ctx) const
{
unsigned xfb_offset = 0;
unsigned size = this->size;
unsigned location = this->location;
unsigned location_frac = this->location_frac;
unsigned num_components = this->num_components();
+
+ /* From GL_EXT_transform_feedback:
+ *
+ * " A program will fail to link if:
+ *
+ * * the total number of components to capture is greater than the
+ * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
+ * and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
+ *
+ * From GL_ARB_enhanced_layouts:
+ *
+ * " The resulting stride (implicit or explicit) must be less than or
+ * equal to the implementation-dependent constant
+ * gl_MaxTransformFeedbackInterleavedComponents."
+ */
+ if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
+ has_xfb_qualifiers) &&
+ xfb_offset + num_components >
+ ctx->Const.MaxTransformFeedbackInterleavedComponents) {
+ linker_error(prog,
+ "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
+ "limit has been exceeded.");
+ return false;
+ }
+
+ /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
+ * Page 76, (Transform Feedback Layout Qualifiers):
+ *
+ * " No aliasing in output buffers is allowed: It is a compile-time or
+ * link-time error to specify variables with overlapping transform
+ * feedback offsets."
+ */
+ const unsigned max_components =
+ ctx->Const.MaxTransformFeedbackInterleavedComponents;
+ const unsigned first_component = xfb_offset;
+ const unsigned last_component = xfb_offset + num_components - 1;
+ const unsigned start_word = BITSET_BITWORD(first_component);
+ const unsigned end_word = BITSET_BITWORD(last_component);
+ BITSET_WORD *used;
+ assert(last_component < max_components);
+
+ if (!used_components[buffer]) {
+ used_components[buffer] =
+ rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
+ }
+ used = used_components[buffer];
+
+ for (unsigned word = start_word; word <= end_word; word++) {
+ unsigned start_range = 0;
+ unsigned end_range = BITSET_WORDBITS - 1;
+
+ if (word == start_word)
+ start_range = first_component % BITSET_WORDBITS;
+
+ if (word == end_word)
+ end_range = last_component % BITSET_WORDBITS;
+
+ if (used[word] & BITSET_RANGE(start_range, end_range)) {
+ linker_error(prog,
+ "variable '%s', xfb_offset (%d) is causing aliasing.",
+ this->orig_name, xfb_offset * 4);
+ return false;
+ }
+ used[word] |= BITSET_RANGE(start_range, end_range);
+ }
+
while (num_components > 0) {
unsigned output_size = MIN2(num_components, 4 - location_frac);
assert((info->NumOutputs == 0 && max_outputs == 0) ||
return false;
}
- if ((this->offset / 4) / info->Buffers[buffer].Stride !=
- (xfb_offset - 1) / info->Buffers[buffer].Stride) {
+ if (xfb_offset > info->Buffers[buffer].Stride) {
linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
"buffer (%d)", xfb_offset * 4,
info->Buffers[buffer].Stride * 4, buffer);
info->Buffers[buffer].Stride = xfb_offset;
}
- /* From GL_EXT_transform_feedback:
- * A program will fail to link if:
- *
- * * the total number of components to capture is greater than
- * the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
- * and the buffer mode is INTERLEAVED_ATTRIBS_EXT.
- *
- * From GL_ARB_enhanced_layouts:
- *
- * "The resulting stride (implicit or explicit) must be less than or
- * equal to the implementation-dependent constant
- * gl_MaxTransformFeedbackInterleavedComponents."
- */
- if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
- has_xfb_qualifiers) &&
- info->Buffers[buffer].Stride >
- ctx->Const.MaxTransformFeedbackInterleavedComponents) {
- linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
- "limit has been exceeded.");
- return false;
- }
-
store_varying:
info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
this->orig_name);
name = "gl_TessLevelInnerMESA";
break;
}
- this->matched_candidate = (const tfeedback_candidate *)
- hash_table_find(tfeedback_candidates, name);
+ hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name);
+
+ this->matched_candidate = entry ?
+ (const tfeedback_candidate *) entry->data : NULL;
+
if (!this->matched_candidate) {
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
linker_error(prog, "Transform feedback varying %s undeclared.",
this->orig_name);
}
+
return this->matched_candidate;
}
+/**
+ * Force a candidate over the previously matched one. It happens when a new
+ * varying needs to be created to match the xfb declaration, for example,
+ * to fullfil an alignment criteria.
+ */
+void
+tfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate)
+{
+ this->matched_candidate = candidate;
+
+ /* The subscript part is no longer relevant */
+ this->is_subscripted = false;
+ this->array_subscript = 0;
+}
+
/**
* Parse all the transform feedback declarations that were passed to
* If an error occurs, the error is reported through linker_error() and false
* is returned.
*/
-bool
+static bool
parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
const void *mem_ctx, unsigned num_names,
char **varying_names, tfeedback_decl *decls)
* feedback of arrays would be useless otherwise.
*/
for (unsigned j = 0; j < i; ++j) {
- if (!decls[j].is_varying())
- continue;
-
- if (tfeedback_decl::is_same(decls[i], decls[j])) {
- linker_error(prog, "Transform feedback varying %s specified "
- "more than once.", varying_names[i]);
- return false;
+ if (decls[j].is_varying()) {
+ if (tfeedback_decl::is_same(decls[i], decls[j])) {
+ linker_error(prog, "Transform feedback varying %s specified "
+ "more than once.", varying_names[i]);
+ return false;
+ }
}
}
}
/**
* Store transform feedback location assignments into
- * prog->LinkedTransformFeedback based on the data stored in tfeedback_decls.
+ * prog->sh.LinkedTransformFeedback based on the data stored in
+ * tfeedback_decls.
*
* If an error occurs, the error is reported through linker_error() and false
* is returned.
*/
-bool
+static bool
store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
unsigned num_tfeedback_decls,
- tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers)
+ tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers,
+ const void *mem_ctx)
{
+ if (!prog->last_vert_prog)
+ return true;
+
/* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
* tracking the number of buffers doesn't overflow.
*/
bool separate_attribs_mode =
prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
- ralloc_free(prog->LinkedTransformFeedback.Varyings);
- ralloc_free(prog->LinkedTransformFeedback.Outputs);
-
- memset(&prog->LinkedTransformFeedback, 0,
- sizeof(prog->LinkedTransformFeedback));
+ struct gl_program *xfb_prog = prog->last_vert_prog;
+ xfb_prog->sh.LinkedTransformFeedback =
+ rzalloc(xfb_prog, struct gl_transform_feedback_info);
/* The xfb_offset qualifier does not have to be used in increasing order
* however some drivers expect to receive the list of transform feedback
* declarations in order so sort it now for convenience.
*/
- if (has_xfb_qualifiers)
+ if (has_xfb_qualifiers) {
qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
cmp_xfb_offset);
+ }
- prog->LinkedTransformFeedback.Varyings =
- rzalloc_array(prog,
- struct gl_transform_feedback_varying_info,
+ xfb_prog->sh.LinkedTransformFeedback->Varyings =
+ rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
num_tfeedback_decls);
unsigned num_outputs = 0;
num_outputs += tfeedback_decls[i].get_num_outputs();
}
- prog->LinkedTransformFeedback.Outputs =
- rzalloc_array(prog,
- struct gl_transform_feedback_output,
+ xfb_prog->sh.LinkedTransformFeedback->Outputs =
+ rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
num_outputs);
unsigned num_buffers = 0;
unsigned buffers = 0;
+ BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {};
if (!has_xfb_qualifiers && separate_attribs_mode) {
/* GL_SEPARATE_ATTRIBS */
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
- if (!tfeedback_decls[i].store(ctx, prog, &prog->LinkedTransformFeedback,
+ if (!tfeedback_decls[i].store(ctx, prog,
+ xfb_prog->sh.LinkedTransformFeedback,
num_buffers, num_buffers, num_outputs,
- NULL, has_xfb_qualifiers))
+ used_components, NULL,
+ has_xfb_qualifiers, mem_ctx))
return false;
buffers |= 1 << num_buffers;
if (has_xfb_qualifiers) {
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
if (prog->TransformFeedback.BufferStride[j]) {
- buffers |= 1 << j;
explicit_stride[j] = true;
- prog->LinkedTransformFeedback.Buffers[j].Stride =
+ xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
prog->TransformFeedback.BufferStride[j] / 4;
}
}
if (tfeedback_decls[i].is_next_buffer_separator()) {
if (!tfeedback_decls[i].store(ctx, prog,
- &prog->LinkedTransformFeedback,
+ xfb_prog->sh.LinkedTransformFeedback,
buffer, num_buffers, num_outputs,
- explicit_stride, has_xfb_qualifiers))
+ used_components, explicit_stride,
+ has_xfb_qualifiers, mem_ctx))
return false;
num_buffers++;
buffer_stream_id = -1;
continue;
- } else if (tfeedback_decls[i].is_varying()) {
+ }
+
+ if (has_xfb_qualifiers) {
+ buffer = tfeedback_decls[i].get_buffer();
+ } else {
+ buffer = num_buffers;
+ }
+
+ if (tfeedback_decls[i].is_varying()) {
if (buffer_stream_id == -1) {
/* First varying writing to this buffer: remember its stream */
buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
+
+ /* Only mark a buffer as active when there is a varying
+ * attached to it. This behaviour is based on a revised version
+ * of section 13.2.2 of the GL 4.6 spec.
+ */
+ buffers |= 1 << buffer;
} else if (buffer_stream_id !=
(int) tfeedback_decls[i].get_stream_id()) {
/* Varying writes to the same buffer from a different stream */
}
}
- if (has_xfb_qualifiers) {
- buffer = tfeedback_decls[i].get_buffer();
- } else {
- buffer = num_buffers;
- }
- buffers |= 1 << buffer;
-
if (!tfeedback_decls[i].store(ctx, prog,
- &prog->LinkedTransformFeedback,
+ xfb_prog->sh.LinkedTransformFeedback,
buffer, num_buffers, num_outputs,
- explicit_stride, has_xfb_qualifiers))
+ used_components, explicit_stride,
+ has_xfb_qualifiers, mem_ctx))
return false;
}
}
- assert(prog->LinkedTransformFeedback.NumOutputs == num_outputs);
+ assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
- prog->LinkedTransformFeedback.ActiveBuffers = buffers;
+ xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
return true;
}
class varying_matches
{
public:
- varying_matches(bool disable_varying_packing, bool xfb_enabled,
+ varying_matches(bool disable_varying_packing,
+ bool disable_xfb_packing,
+ bool xfb_enabled,
+ bool enhanced_layouts_enabled,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage);
~varying_matches();
void record(ir_variable *producer_var, ir_variable *consumer_var);
unsigned assign_locations(struct gl_shader_program *prog,
+ uint8_t components[],
uint64_t reserved_slots);
void store_locations() const;
private:
bool is_varying_packing_safe(const glsl_type *type,
- const ir_variable *var);
+ const ir_variable *var) const;
/**
* If true, this driver disables varying packing, so all varyings need to
*/
const bool disable_varying_packing;
+ /**
+ * If true, this driver disables packing for varyings used by transform
+ * feedback.
+ */
+ const bool disable_xfb_packing;
+
/**
* If true, this driver has transform feedback enabled. The transform
- * feedback code requires at least some packing be done even when varying
- * packing is disabled, fortunately where transform feedback requires
- * packing it's safe to override the disabled setting. See
+ * feedback code usually requires at least some packing be done even
+ * when varying packing is disabled, fortunately where transform feedback
+ * requires packing it's safe to override the disabled setting. See
* is_varying_packing_safe().
*/
const bool xfb_enabled;
+ const bool enhanced_layouts_enabled;
+
/**
* Enum representing the order in which varyings are packed within a
* packing class.
static packing_order_enum compute_packing_order(const ir_variable *var);
static int match_comparator(const void *x_generic, const void *y_generic);
static int xfb_comparator(const void *x_generic, const void *y_generic);
+ static int not_xfb_comparator(const void *x_generic, const void *y_generic);
/**
* Structure recording the relationship between a single producer output
} /* anonymous namespace */
varying_matches::varying_matches(bool disable_varying_packing,
+ bool disable_xfb_packing,
bool xfb_enabled,
+ bool enhanced_layouts_enabled,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage)
: disable_varying_packing(disable_varying_packing),
+ disable_xfb_packing(disable_xfb_packing),
xfb_enabled(xfb_enabled),
+ enhanced_layouts_enabled(enhanced_layouts_enabled),
producer_stage(producer_stage),
consumer_stage(consumer_stage)
{
*/
bool
varying_matches::is_varying_packing_safe(const glsl_type *type,
- const ir_variable *var)
+ const ir_variable *var) const
{
if (consumer_stage == MESA_SHADER_TESS_EVAL ||
consumer_stage == MESA_SHADER_TESS_CTRL ||
producer_stage == MESA_SHADER_TESS_CTRL)
return false;
- return xfb_enabled && (type->is_array() || type->is_record() ||
+ return xfb_enabled && (type->is_array() || type->is_struct() ||
type->is_matrix() || var->data.is_xfb_only);
}
(producer_var->type->contains_integer() ||
producer_var->type->contains_double());
- if (needs_flat_qualifier ||
- (consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) {
+ if (!disable_varying_packing &&
+ (!disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) &&
+ (needs_flat_qualifier ||
+ (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
/* Since this varying is not being consumed by the fragment shader, its
* interpolation type varying cannot possibly affect rendering.
* Also, this variable is non-flat and is (or contains) an integer
if (producer_var) {
producer_var->data.centroid = false;
producer_var->data.sample = false;
- producer_var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ producer_var->data.interpolation = INTERP_MODE_FLAT;
}
if (consumer_var) {
consumer_var->data.centroid = false;
consumer_var->data.sample = false;
- consumer_var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ consumer_var->data.interpolation = INTERP_MODE_FLAT;
}
}
sizeof(*this->matches) * this->matches_capacity);
}
- const ir_variable *const var = (producer_var != NULL)
- ? producer_var : consumer_var;
- const gl_shader_stage stage = (producer_var != NULL)
- ? producer_stage : consumer_stage;
+ /* We must use the consumer to compute the packing class because in GL4.4+
+ * there is no guarantee interpolation qualifiers will match across stages.
+ *
+ * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
+ *
+ * "The type and presence of interpolation qualifiers of variables with
+ * the same name declared in all linked shaders for the same cross-stage
+ * interface must match, otherwise the link command will fail.
+ *
+ * When comparing an output from one stage to an input of a subsequent
+ * stage, the input and output don't match if their interpolation
+ * qualifiers (or lack thereof) are not the same."
+ *
+ * This text was also in at least revison 7 of the 4.40 spec but is no
+ * longer in revision 9 and not in the 4.50 spec.
+ */
+ const ir_variable *const var = (consumer_var != NULL)
+ ? consumer_var : producer_var;
+ const gl_shader_stage stage = (consumer_var != NULL)
+ ? consumer_stage : producer_stage;
const glsl_type *type = get_varying_type(var, stage);
+ if (producer_var && consumer_var &&
+ consumer_var->data.must_be_shader_input) {
+ producer_var->data.must_be_shader_input = 1;
+ }
+
this->matches[this->num_matches].packing_class
= this->compute_packing_class(var);
this->matches[this->num_matches].packing_order
= this->compute_packing_order(var);
- if (this->disable_varying_packing && !is_varying_packing_safe(type, var)) {
+ if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
+ (this->disable_xfb_packing && var->data.is_xfb) ||
+ var->data.must_be_shader_input) {
unsigned slots = type->count_attribute_slots(false);
this->matches[this->num_matches].num_components = slots * 4;
} else {
this->matches[this->num_matches].num_components
= type->component_slots();
}
+
this->matches[this->num_matches].producer_var = producer_var;
this->matches[this->num_matches].consumer_var = consumer_var;
this->num_matches++;
/**
* Choose locations for all of the variable matches that were previously
* passed to varying_matches::record().
+ * \param components returns array[slot] of number of components used
+ * per slot (1, 2, 3 or 4)
+ * \param reserved_slots bitmask indicating which varying slots are already
+ * allocated
+ * \return number of slots (4-element vectors) allocated
*/
unsigned
varying_matches::assign_locations(struct gl_shader_program *prog,
+ uint8_t components[],
uint64_t reserved_slots)
{
/* If packing has been disabled then we cannot safely sort the varyings by
* When packing is disabled the sort orders varyings used by transform
* feedback first, but also depends on *undefined behaviour* of qsort to
* reverse the order of the varyings. See: xfb_comparator().
+ *
+ * If packing is only disabled for xfb varyings (mutually exclusive with
+ * disable_varying_packing), we then group varyings depending on if they
+ * are captured for transform feedback. The same *undefined behaviour* is
+ * taken advantage of.
*/
- if (!this->disable_varying_packing) {
- /* Sort varying matches into an order that makes them easy to pack. */
- qsort(this->matches, this->num_matches, sizeof(*this->matches),
- &varying_matches::match_comparator);
- } else {
+ if (this->disable_varying_packing) {
/* Only sort varyings that are only used by transform feedback. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::xfb_comparator);
+ } else if (this->disable_xfb_packing) {
+ /* Only sort varyings that are NOT used by transform feedback. */
+ qsort(this->matches, this->num_matches, sizeof(*this->matches),
+ &varying_matches::not_xfb_comparator);
+ } else {
+ /* Sort varying matches into an order that makes them easy to pack. */
+ qsort(this->matches, this->num_matches, sizeof(*this->matches),
+ &varying_matches::match_comparator);
}
unsigned generic_location = 0;
unsigned generic_patch_location = MAX_VARYING*4;
+ bool previous_var_xfb = false;
bool previous_var_xfb_only = false;
+ unsigned previous_packing_class = ~0u;
+
+ /* For tranform feedback separate mode, we know the number of attributes
+ * is <= the number of buffers. So packing isn't critical. In fact,
+ * packing vec3 attributes can cause trouble because splitting a vec3
+ * effectively creates an additional transform feedback output. The
+ * extra TFB output may exceed device driver limits.
+ */
+ const bool dont_pack_vec3 =
+ (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
+ prog->TransformFeedback.NumVarying > 0);
for (unsigned i = 0; i < this->num_matches; i++) {
unsigned *location = &generic_location;
-
const ir_variable *var;
const glsl_type *type;
bool is_vertex_input = false;
+
if (matches[i].consumer_var) {
var = matches[i].consumer_var;
type = get_varying_type(var, consumer_stage);
* class than the previous one, and we're not already on a slot
* boundary.
*
+ * Also advance if varying packing is disabled for transform feedback,
+ * and previous or current varying is used for transform feedback.
+ *
* Also advance to the next slot if packing is disabled. This makes sure
* we don't assign varyings the same locations which is possible
* because we still pack individual arrays, records and matrices even
* we can pack varyings together that are only used for transform
* feedback.
*/
- if ((this->disable_varying_packing &&
+ if (var->data.must_be_shader_input ||
+ (this->disable_xfb_packing &&
+ (previous_var_xfb || var->data.is_xfb)) ||
+ (this->disable_varying_packing &&
!(previous_var_xfb_only && var->data.is_xfb_only)) ||
- (i > 0 && this->matches[i - 1].packing_class
- != this->matches[i].packing_class )) {
+ (previous_packing_class != this->matches[i].packing_class) ||
+ (this->matches[i].packing_order == PACKING_ORDER_VEC3 &&
+ dont_pack_vec3)) {
*location = ALIGN(*location, 4);
}
+ previous_var_xfb = var->data.is_xfb;
previous_var_xfb_only = var->data.is_xfb_only;
+ previous_packing_class = this->matches[i].packing_class;
- unsigned num_elements = type->count_attribute_slots(is_vertex_input);
- unsigned slot_end;
- if (this->disable_varying_packing &&
- !is_varying_packing_safe(type, var))
- slot_end = 4;
- else
- slot_end = type->without_array()->vector_elements;
- slot_end += *location - 1;
+ /* The number of components taken up by this variable. For vertex shader
+ * inputs, we use the number of slots * 4, as they have different
+ * counting rules.
+ */
+ unsigned num_components = is_vertex_input ?
+ type->count_attribute_slots(is_vertex_input) * 4 :
+ this->matches[i].num_components;
+
+ /* The last slot for this variable, inclusive. */
+ unsigned slot_end = *location + num_components - 1;
/* FIXME: We could be smarter in the below code and loop back over
* trying to fill any locations that we skipped because we couldn't pack
* hit the linking error if we run out of room and suggest they use
* explicit locations.
*/
- for (unsigned j = 0; j < num_elements; j++) {
- while ((slot_end < MAX_VARYING * 4u) &&
- ((reserved_slots & (UINT64_C(1) << *location / 4u) ||
- (reserved_slots & (UINT64_C(1) << slot_end / 4u))))) {
+ while (slot_end < MAX_VARYING * 4u) {
+ const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
+ const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
- *location = ALIGN(*location + 1, 4);
- slot_end = *location;
+ assert(slots > 0);
- /* reset the counter and try again */
- j = 0;
+ if ((reserved_slots & slot_mask) == 0) {
+ break;
}
- /* Increase the slot to make sure there is enough room for next
- * array element.
- */
- if (this->disable_varying_packing &&
- !is_varying_packing_safe(type, var))
- slot_end += 4;
- else
- slot_end += type->without_array()->vector_elements;
+ *location = ALIGN(*location + 1, 4);
+ slot_end = *location + num_components - 1;
}
- if (!var->data.patch && *location >= MAX_VARYING * 4u) {
+ if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
linker_error(prog, "insufficient contiguous locations available for "
"%s it is possible an array or struct could not be "
"packed between varyings with explicit locations. Try "
var->name);
}
+ if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
+ for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
+ components[j] = 4;
+ components[slot_end / 4u] = (slot_end & 3) + 1;
+ }
+
this->matches[i].generic_location = *location;
- *location += this->matches[i].num_components;
+ *location = slot_end + 1;
}
return (generic_location + 3) / 4;
void
varying_matches::store_locations() const
{
+ /* Check is location needs to be packed with lower_packed_varyings() or if
+ * we can just use ARB_enhanced_layouts packing.
+ */
+ bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 };
+ const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
+
for (unsigned i = 0; i < this->num_matches; i++) {
ir_variable *producer_var = this->matches[i].producer_var;
ir_variable *consumer_var = this->matches[i].consumer_var;
consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
consumer_var->data.location_frac = offset;
}
+
+ /* Find locations suitable for native packing via
+ * ARB_enhanced_layouts.
+ */
+ if (producer_var && consumer_var) {
+ if (enhanced_layouts_enabled) {
+ const glsl_type *type =
+ get_varying_type(producer_var, producer_stage);
+ if (type->is_array() || type->is_matrix() || type->is_struct() ||
+ type->is_64bit()) {
+ unsigned comp_slots = type->component_slots() + offset;
+ unsigned slots = comp_slots / 4;
+ if (comp_slots % 4)
+ slots += 1;
+
+ for (unsigned j = 0; j < slots; j++) {
+ pack_loc[slot + j] = true;
+ }
+ } else if (offset + type->vector_elements > 4) {
+ pack_loc[slot] = true;
+ pack_loc[slot + 1] = true;
+ } else {
+ loc_type[slot][offset] = type;
+ }
+ }
+ }
+ }
+
+ /* Attempt to use ARB_enhanced_layouts for more efficient packing if
+ * suitable.
+ */
+ if (enhanced_layouts_enabled) {
+ for (unsigned i = 0; i < this->num_matches; i++) {
+ ir_variable *producer_var = this->matches[i].producer_var;
+ ir_variable *consumer_var = this->matches[i].consumer_var;
+ unsigned generic_location = this->matches[i].generic_location;
+ unsigned slot = generic_location / 4;
+
+ if (pack_loc[slot] || !producer_var || !consumer_var)
+ continue;
+
+ const glsl_type *type =
+ get_varying_type(producer_var, producer_stage);
+ bool type_match = true;
+ for (unsigned j = 0; j < 4; j++) {
+ if (loc_type[slot][j]) {
+ if (type->base_type != loc_type[slot][j]->base_type)
+ type_match = false;
+ }
+ }
+
+ if (type_match) {
+ producer_var->data.explicit_location = 1;
+ consumer_var->data.explicit_location = 1;
+ producer_var->data.explicit_component = 1;
+ consumer_var->data.explicit_component = 1;
+ }
+ }
}
}
*
* Therefore, the packing class depends only on the interpolation type.
*/
- unsigned packing_class = var->data.centroid | (var->data.sample << 1) |
- (var->data.patch << 2);
- packing_class *= 4;
- packing_class += var->data.interpolation;
+ const unsigned interp = var->is_interpolation_flat()
+ ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation;
+
+ assert(interp < (1 << 3));
+
+ const unsigned packing_class = (interp << 0) |
+ (var->data.centroid << 3) |
+ (var->data.sample << 4) |
+ (var->data.patch << 5) |
+ (var->data.must_be_shader_input << 6);
+
return packing_class;
}
{
const glsl_type *element_type = var->type;
- while (element_type->base_type == GLSL_TYPE_ARRAY) {
+ while (element_type->is_array()) {
element_type = element_type->fields.array;
}
const match *x = (const match *) x_generic;
if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
- return match_comparator(x_generic, y_generic);
+ return match_comparator(x_generic, y_generic);
+
+ /* FIXME: When the comparator returns 0 it means the elements being
+ * compared are equivalent. However the qsort documentation says:
+ *
+ * "The order of equivalent elements is undefined."
+ *
+ * In practice the sort ends up reversing the order of the varyings which
+ * means locations are also assigned in this reversed order and happens to
+ * be what we want. This is also whats happening in
+ * varying_matches::match_comparator().
+ */
+ return 0;
+}
+
+
+/**
+ * Comparison function passed to qsort() to sort varyings NOT used by
+ * transform feedback when packing of xfb varyings is disabled.
+ */
+int
+varying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic)
+{
+ const match *x = (const match *) x_generic;
+
+ if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
+ return match_comparator(x_generic, y_generic);
/* FIXME: When the comparator returns 0 it means the elements being
* compared are equivalent. However the qsort documentation says:
{
public:
tfeedback_candidate_generator(void *mem_ctx,
- hash_table *tfeedback_candidates)
+ hash_table *tfeedback_candidates,
+ gl_shader_stage stage)
: mem_ctx(mem_ctx),
tfeedback_candidates(tfeedback_candidates),
+ stage(stage),
toplevel_var(NULL),
varying_floats(0)
{
{
/* All named varying interface blocks should be flattened by now */
assert(!var->is_interface_instance());
+ assert(var->data.mode == ir_var_shader_out);
this->toplevel_var = var;
this->varying_floats = 0;
- program_resource_visitor::process(var);
+ const glsl_type *t =
+ var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
+ if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) {
+ assert(t->is_array());
+ t = t->fields.array;
+ }
+ program_resource_visitor::process(var, t, false);
}
private:
virtual void visit_field(const glsl_type *type, const char *name,
- bool row_major)
+ bool /* row_major */,
+ const glsl_type * /* record_type */,
+ const enum glsl_interface_packing,
+ bool /* last_field */)
{
- assert(!type->without_array()->is_record());
+ assert(!type->without_array()->is_struct());
assert(!type->without_array()->is_interface());
- (void) row_major;
-
tfeedback_candidate *candidate
= rzalloc(this->mem_ctx, tfeedback_candidate);
candidate->toplevel_var = this->toplevel_var;
candidate->type = type;
candidate->offset = this->varying_floats;
- hash_table_insert(this->tfeedback_candidates, candidate,
- ralloc_strdup(this->mem_ctx, name));
+ _mesa_hash_table_insert(this->tfeedback_candidates,
+ ralloc_strdup(this->mem_ctx, name),
+ candidate);
this->varying_floats += type->component_slots();
}
*/
hash_table * const tfeedback_candidates;
+ gl_shader_stage stage;
+
/**
* Pointer to the toplevel variable that is being traversed.
*/
ralloc_asprintf(mem_ctx, "%s.%s",
input_var->get_interface_type()->without_array()->name,
input_var->name);
- hash_table_insert(consumer_interface_inputs, input_var,
- iface_field_name);
+ _mesa_hash_table_insert(consumer_interface_inputs,
+ iface_field_name, input_var);
} else {
- hash_table_insert(consumer_inputs, input_var,
- ralloc_strdup(mem_ctx, input_var->name));
+ _mesa_hash_table_insert(consumer_inputs,
+ ralloc_strdup(mem_ctx, input_var->name),
+ input_var);
}
}
}
ralloc_asprintf(mem_ctx, "%s.%s",
output_var->get_interface_type()->without_array()->name,
output_var->name);
- input_var =
- (ir_variable *) hash_table_find(consumer_interface_inputs,
- iface_field_name);
+ hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
+ input_var = entry ? (ir_variable *) entry->data : NULL;
} else {
- input_var =
- (ir_variable *) hash_table_find(consumer_inputs, output_var->name);
+ hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name);
+ input_var = entry ? (ir_variable *) entry->data : NULL;
}
return (input_var == NULL || input_var->data.mode != ir_var_shader_in)
* with a max of MAX_VARYING.
*/
static uint64_t
-reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode)
+reserved_varying_slot(struct gl_linked_shader *stage,
+ ir_variable_mode io_mode)
{
assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
/* Avoid an overflow of the returned value */
var_slot = var->data.location - VARYING_SLOT_VAR0;
unsigned num_elements = get_varying_type(var, stage->Stage)
- ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX);
+ ->count_attribute_slots(io_mode == ir_var_shader_in &&
+ stage->Stage == MESA_SHADER_VERTEX);
for (unsigned i = 0; i < num_elements; i++) {
if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
slots |= UINT64_C(1) << var_slot;
* be NULL. In this case, varying locations are assigned solely based on the
* requirements of transform feedback.
*/
-bool
+static bool
assign_varying_locations(struct gl_context *ctx,
void *mem_ctx,
struct gl_shader_program *prog,
- gl_shader *producer, gl_shader *consumer,
+ gl_linked_shader *producer,
+ gl_linked_shader *consumer,
unsigned num_tfeedback_decls,
- tfeedback_decl *tfeedback_decls)
+ tfeedback_decl *tfeedback_decls,
+ const uint64_t reserved_slots)
{
/* Tessellation shaders treat inputs and outputs as shared memory and can
* access inputs and outputs of other invocations.
/* Transform feedback code assumes varying arrays are packed, so if the
* driver has disabled varying packing, make sure to at least enable
- * packing required by transform feedback.
+ * packing required by transform feedback. See below for exception.
*/
bool xfb_enabled =
ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
- /* Disable varying packing for GL 4.4+ as there is no guarantee
- * that interpolation qualifiers will match between shaders in these
- * versions. We also disable packing on outward facing interfaces for
- * SSO because in ES we need to retain the unpacked varying information
- * for draw time validation. For desktop GL we could allow packing for
- * versions < 4.4 but it's just safer not to do packing.
+ /* Some drivers actually requires packing to be explicitly disabled
+ * for varyings used by transform feedback.
+ */
+ bool disable_xfb_packing =
+ ctx->Const.DisableTransformFeedbackPacking;
+
+ /* Disable packing on outward facing interfaces for SSO because in ES we
+ * need to retain the unpacked varying information for draw time
+ * validation.
*
* Packing is still enabled on individual arrays, structs, and matrices as
* these are required by the transform feedback code and it is still safe
* to do so. We also enable packing when a varying is only used for
* transform feedback and its not a SSO.
- *
- * Varying packing currently only packs together varyings with matching
- * interpolation qualifiers as the backends assume all packed components
- * are to be processed in the same way. Therefore we cannot do packing in
- * these versions of GL without the risk of mismatching interfaces.
- *
- * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
- *
- * "The type and presence of interpolation qualifiers of variables with
- * the same name declared in all linked shaders for the same cross-stage
- * interface must match, otherwise the link command will fail.
- *
- * When comparing an output from one stage to an input of a subsequent
- * stage, the input and output don't match if their interpolation
- * qualifiers (or lack thereof) are not the same."
- *
- * This text was also in at least revison 7 of the 4.40 spec but is no
- * longer in revision 9 and not in the 4.50 spec.
*/
bool disable_varying_packing =
ctx->Const.DisableVaryingPacking || unpackable_tess;
- if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) ||
- (prog->SeparateShader && (producer == NULL || consumer == NULL)))
+ if (prog->SeparateShader && (producer == NULL || consumer == NULL))
disable_varying_packing = true;
- varying_matches matches(disable_varying_packing, xfb_enabled,
- producer ? producer->Stage : (gl_shader_stage)-1,
- consumer ? consumer->Stage : (gl_shader_stage)-1);
- hash_table *tfeedback_candidates
- = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);
- hash_table *consumer_inputs
- = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);
- hash_table *consumer_interface_inputs
- = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);
+ varying_matches matches(disable_varying_packing,
+ disable_xfb_packing,
+ xfb_enabled,
+ ctx->Extensions.ARB_enhanced_layouts,
+ producer ? producer->Stage : MESA_SHADER_NONE,
+ consumer ? consumer->Stage : MESA_SHADER_NONE);
+ void *hash_table_ctx = ralloc_context(NULL);
+ hash_table *tfeedback_candidates =
+ _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
+ _mesa_key_string_equal);
+ hash_table *consumer_inputs =
+ _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
+ _mesa_key_string_equal);
+ hash_table *consumer_interface_inputs =
+ _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
+ _mesa_key_string_equal);
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
NULL,
};
producer->Stage == MESA_SHADER_GEOMETRY));
if (num_tfeedback_decls > 0) {
- tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
- g.process(output_var);
+ tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage);
+ /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
+ * ("Vertex Shader Variables / Output Variables")
+ *
+ * "Each program object can specify a set of output variables from
+ * one shader to be recorded in transform feedback mode (see
+ * section 13.3). The variables that can be recorded are those
+ * emitted by the first active shader, in order, from the
+ * following list:
+ *
+ * * geometry shader
+ * * tessellation evaluation shader
+ * * tessellation control shader
+ * * vertex shader"
+ *
+ * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
+ * Variables / Output Variables") tessellation control shader is
+ * not included in the stages list.
+ */
+ if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
+ g.process(output_var);
+ }
}
ir_variable *const input_var =
* within a patch and can be used as shared memory.
*/
if (input_var || (prog->SeparateShader && consumer == NULL) ||
- producer->Type == GL_TESS_CONTROL_SHADER) {
+ producer->Stage == MESA_SHADER_TESS_CTRL) {
matches.record(output_var, input_var);
}
linker_error(prog, "output %s is assigned to stream=%d but "
"is linked to an input, which requires stream=0",
output_var->name, output_var->data.stream);
+ ralloc_free(hash_table_ctx);
return false;
}
}
*/
foreach_in_list(ir_instruction, node, consumer->ir) {
ir_variable *const input_var = node->as_variable();
-
- if (input_var == NULL || input_var->data.mode != ir_var_shader_in)
- continue;
-
- matches.record(NULL, input_var);
+ if (input_var && input_var->data.mode == ir_var_shader_in) {
+ matches.record(NULL, input_var);
+ }
}
}
= tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
if (matched_candidate == NULL) {
- hash_table_dtor(tfeedback_candidates);
- hash_table_dtor(consumer_inputs);
- hash_table_dtor(consumer_interface_inputs);
+ ralloc_free(hash_table_ctx);
return false;
}
+ /* There are two situations where a new output varying is needed:
+ *
+ * - If varying packing is disabled for xfb and the current declaration
+ * is not aligned within the top level varying (e.g. vec3_arr[1]).
+ *
+ * - If a builtin variable needs to be copied to a new variable
+ * before its content is modified by another lowering pass (e.g.
+ * \c gl_Position is transformed by \c nir_lower_viewport_transform).
+ */
+ const unsigned dmul =
+ matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
+ const bool lowered =
+ (disable_xfb_packing &&
+ !tfeedback_decls[i].is_aligned(dmul, matched_candidate->offset)) ||
+ (matched_candidate->toplevel_var->data.explicit_location &&
+ matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
+ (ctx->Const.ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
+ BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
+
+ if (lowered) {
+ ir_variable *new_var;
+ tfeedback_candidate *new_candidate = NULL;
+
+ new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name());
+ if (new_var == NULL) {
+ ralloc_free(hash_table_ctx);
+ return false;
+ }
+
+ /* Create new candidate and replace matched_candidate */
+ new_candidate = rzalloc(mem_ctx, tfeedback_candidate);
+ new_candidate->toplevel_var = new_var;
+ new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1;
+ new_candidate->type = new_var->type;
+ new_candidate->offset = 0;
+ _mesa_hash_table_insert(tfeedback_candidates,
+ ralloc_strdup(mem_ctx, new_var->name),
+ new_candidate);
+
+ tfeedback_decls[i].set_lowered_candidate(new_candidate);
+ matched_candidate = new_candidate;
+ }
+
+ /* Mark as xfb varying */
+ matched_candidate->toplevel_var->data.is_xfb = 1;
+
+ /* Mark xfb varyings as always active */
+ matched_candidate->toplevel_var->data.always_active_io = 1;
+
+ /* Mark any corresponding inputs as always active also. We must do this
+ * because we have a NIR pass that lowers vectors to scalars and another
+ * that removes unused varyings.
+ * We don't split varyings marked as always active because there is no
+ * point in doing so. This means we need to mark both sides of the
+ * interface as always active otherwise we will have a mismatch and
+ * start removing things we shouldn't.
+ */
+ ir_variable *const input_var =
+ linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var,
+ consumer_inputs,
+ consumer_interface_inputs,
+ consumer_inputs_with_locations);
+ if (input_var) {
+ input_var->data.is_xfb = 1;
+ input_var->data.always_active_io = 1;
+ }
+
if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
matched_candidate->toplevel_var->data.is_xfb_only = 1;
matches.record(matched_candidate->toplevel_var, NULL);
}
}
- const uint64_t reserved_slots =
- reserved_varying_slot(producer, ir_var_shader_out) |
- reserved_varying_slot(consumer, ir_var_shader_in);
-
- const unsigned slots_used = matches.assign_locations(prog, reserved_slots);
+ uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
+ const unsigned slots_used = matches.assign_locations(
+ prog, components, reserved_slots);
matches.store_locations();
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
- if (!tfeedback_decls[i].is_varying())
- continue;
-
- if (!tfeedback_decls[i].assign_location(ctx, prog)) {
- hash_table_dtor(tfeedback_candidates);
- hash_table_dtor(consumer_inputs);
- hash_table_dtor(consumer_interface_inputs);
- return false;
+ if (tfeedback_decls[i].is_varying()) {
+ if (!tfeedback_decls[i].assign_location(ctx, prog)) {
+ ralloc_free(hash_table_ctx);
+ return false;
+ }
}
}
-
- hash_table_dtor(tfeedback_candidates);
- hash_table_dtor(consumer_inputs);
- hash_table_dtor(consumer_interface_inputs);
+ ralloc_free(hash_table_ctx);
if (consumer && producer) {
foreach_in_list(ir_instruction, node, consumer->ir) {
if (var && var->data.mode == ir_var_shader_in &&
var->data.is_unmatched_generic_inout) {
- if (!prog->IsES && prog->Version <= 120) {
+ if (!prog->IsES && prog->data->Version <= 120) {
/* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
*
* Only those varying variables used (i.e. read) in
}
if (producer) {
- lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
+ lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
0, producer, disable_varying_packing,
- xfb_enabled);
+ disable_xfb_packing, xfb_enabled);
}
if (consumer) {
- lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
- consumer_vertices, consumer,
- disable_varying_packing, xfb_enabled);
+ lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
+ consumer_vertices, consumer, disable_varying_packing,
+ disable_xfb_packing, xfb_enabled);
}
return true;
}
-bool
+static bool
check_against_output_limit(struct gl_context *ctx,
struct gl_shader_program *prog,
- gl_shader *producer)
+ gl_linked_shader *producer,
+ unsigned num_explicit_locations)
{
- unsigned output_vectors = 0;
+ unsigned output_vectors = num_explicit_locations;
foreach_in_list(ir_instruction, node, producer->ir) {
ir_variable *const var = node->as_variable();
- if (var && var->data.mode == ir_var_shader_out &&
+ if (var && !var->data.explicit_location &&
+ var->data.mode == ir_var_shader_out &&
var_counts_against_varying_limit(producer->Stage, var)) {
/* outputs for fragment shader can't be doubles */
output_vectors += var->type->count_attribute_slots(false);
return true;
}
-bool
+static bool
check_against_input_limit(struct gl_context *ctx,
struct gl_shader_program *prog,
- gl_shader *consumer)
+ gl_linked_shader *consumer,
+ unsigned num_explicit_locations)
{
- unsigned input_vectors = 0;
+ unsigned input_vectors = num_explicit_locations;
foreach_in_list(ir_instruction, node, consumer->ir) {
ir_variable *const var = node->as_variable();
- if (var && var->data.mode == ir_var_shader_in &&
+ if (var && !var->data.explicit_location &&
+ var->data.mode == ir_var_shader_in &&
var_counts_against_varying_limit(consumer->Stage, var)) {
/* vertex inputs aren't varying counted */
input_vectors += var->type->count_attribute_slots(false);
return true;
}
+
+bool
+link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
+ struct gl_context *ctx, void *mem_ctx)
+{
+ bool has_xfb_qualifiers = false;
+ unsigned num_tfeedback_decls = 0;
+ char **varying_names = NULL;
+ tfeedback_decl *tfeedback_decls = NULL;
+
+ /* From the ARB_enhanced_layouts spec:
+ *
+ * "If the shader used to record output variables for transform feedback
+ * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
+ * qualifiers, the values specified by TransformFeedbackVaryings are
+ * ignored, and the set of variables captured for transform feedback is
+ * instead derived from the specified layout qualifiers."
+ */
+ for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
+ /* Find last stage before fragment shader */
+ if (prog->_LinkedShaders[i]) {
+ has_xfb_qualifiers =
+ process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
+ prog, &num_tfeedback_decls,
+ &varying_names);
+ break;
+ }
+ }
+
+ if (!has_xfb_qualifiers) {
+ num_tfeedback_decls = prog->TransformFeedback.NumVarying;
+ varying_names = prog->TransformFeedback.VaryingNames;
+ }
+
+ if (num_tfeedback_decls != 0) {
+ /* From GL_EXT_transform_feedback:
+ * A program will fail to link if:
+ *
+ * * the <count> specified by TransformFeedbackVaryingsEXT is
+ * non-zero, but the program object has no vertex or geometry
+ * shader;
+ */
+ if (first >= MESA_SHADER_FRAGMENT) {
+ linker_error(prog, "Transform feedback varyings specified, but "
+ "no vertex, tessellation, or geometry shader is "
+ "present.\n");
+ return false;
+ }
+
+ tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,
+ num_tfeedback_decls);
+ if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls,
+ varying_names, tfeedback_decls))
+ return false;
+ }
+
+ /* If there is no fragment shader we need to set transform feedback.
+ *
+ * For SSO we also need to assign output locations. We assign them here
+ * because we need to do it for both single stage programs and multi stage
+ * programs.
+ */
+ if (last < MESA_SHADER_FRAGMENT &&
+ (num_tfeedback_decls != 0 || prog->SeparateShader)) {
+ const uint64_t reserved_out_slots =
+ reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out);
+ if (!assign_varying_locations(ctx, mem_ctx, prog,
+ prog->_LinkedShaders[last], NULL,
+ num_tfeedback_decls, tfeedback_decls,
+ reserved_out_slots))
+ return false;
+ }
+
+ if (last <= MESA_SHADER_FRAGMENT) {
+ /* Remove unused varyings from the first/last stage unless SSO */
+ remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
+ prog->_LinkedShaders[first],
+ ir_var_shader_in);
+ remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
+ prog->_LinkedShaders[last],
+ ir_var_shader_out);
+
+ /* If the program is made up of only a single stage */
+ if (first == last) {
+ gl_linked_shader *const sh = prog->_LinkedShaders[last];
+
+ do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
+ do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
+ tfeedback_decls);
+
+ if (prog->SeparateShader) {
+ const uint64_t reserved_slots =
+ reserved_varying_slot(sh, ir_var_shader_in);
+
+ /* Assign input locations for SSO, output locations are already
+ * assigned.
+ */
+ if (!assign_varying_locations(ctx, mem_ctx, prog,
+ NULL /* producer */,
+ sh /* consumer */,
+ 0 /* num_tfeedback_decls */,
+ NULL /* tfeedback_decls */,
+ reserved_slots))
+ return false;
+ }
+ } else {
+ /* Linking the stages in the opposite order (from fragment to vertex)
+ * ensures that inter-shader outputs written to in an earlier stage
+ * are eliminated if they are (transitively) not used in a later
+ * stage.
+ */
+ int next = last;
+ for (int i = next - 1; i >= 0; i--) {
+ if (prog->_LinkedShaders[i] == NULL && i != 0)
+ continue;
+
+ gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
+ gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
+
+ const uint64_t reserved_out_slots =
+ reserved_varying_slot(sh_i, ir_var_shader_out);
+ const uint64_t reserved_in_slots =
+ reserved_varying_slot(sh_next, ir_var_shader_in);
+
+ do_dead_builtin_varyings(ctx, sh_i, sh_next,
+ next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
+ tfeedback_decls);
+
+ if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
+ next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
+ tfeedback_decls,
+ reserved_out_slots | reserved_in_slots))
+ return false;
+
+ /* This must be done after all dead varyings are eliminated. */
+ if (sh_i != NULL) {
+ unsigned slots_used = util_bitcount64(reserved_out_slots);
+ if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) {
+ return false;
+ }
+ }
+
+ unsigned slots_used = util_bitcount64(reserved_in_slots);
+ if (!check_against_input_limit(ctx, prog, sh_next, slots_used))
+ return false;
+
+ next = i;
+ }
+ }
+ }
+
+ if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
+ has_xfb_qualifiers, mem_ctx))
+ return false;
+
+ return true;
+}