/* Structures across shader stages can have different name
* and considered to match in type if and only if structure
* members match in name, type, qualification, and declaration
- * order.
+ * order. The precision doesn’t need to match.
*/
- if (!output->type->record_compare(type_to_match, false, true)) {
+ if (!output->type->record_compare(type_to_match,
+ false, /* match_name */
+ true, /* match_locations */
+ false /* match_precision */)) {
linker_error(prog,
"%s shader output `%s' declared as struct `%s', "
"doesn't match in type with %s shader input "
/**
* Validate explicit locations for the inputs to the first stage and the
- * outputs of the last stage in an SSO program (everything in between is
- * validated in cross_validate_outputs_to_inputs).
+ * outputs of the last stage in a program, if those are not the VS and FS
+ * shaders.
*/
void
-validate_sso_explicit_locations(struct gl_context *ctx,
- struct gl_shader_program *prog,
- gl_shader_stage first_stage,
- gl_shader_stage last_stage)
+validate_first_and_last_interface_explicit_locations(struct gl_context *ctx,
+ struct gl_shader_program *prog,
+ gl_shader_stage first_stage,
+ gl_shader_stage last_stage)
{
- assert(prog->SeparateShader);
-
/* VS inputs and FS outputs are validated in
* assign_attribute_or_color_locations()
*/
gl_linked_shader *consumer)
{
glsl_symbol_table parameters;
- struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = { 0 };
- struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = { 0 };
+ struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {};
+ struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {};
/* Find all shader outputs in the "producer" stage.
*/
tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
struct gl_transform_feedback_info *info,
unsigned buffer, unsigned buffer_index,
- const unsigned max_outputs, bool *explicit_stride,
- bool has_xfb_qualifiers) const
+ const unsigned max_outputs,
+ BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
+ bool *explicit_stride, bool has_xfb_qualifiers,
+ const void* mem_ctx) const
{
unsigned xfb_offset = 0;
unsigned size = this->size;
unsigned location = this->location;
unsigned location_frac = this->location_frac;
unsigned num_components = this->num_components();
+
+ /* From GL_EXT_transform_feedback:
+ *
+ * " A program will fail to link if:
+ *
+ * * the total number of components to capture is greater than the
+ * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
+ * and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
+ *
+ * From GL_ARB_enhanced_layouts:
+ *
+ * " The resulting stride (implicit or explicit) must be less than or
+ * equal to the implementation-dependent constant
+ * gl_MaxTransformFeedbackInterleavedComponents."
+ */
+ if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
+ has_xfb_qualifiers) &&
+ xfb_offset + num_components >
+ ctx->Const.MaxTransformFeedbackInterleavedComponents) {
+ linker_error(prog,
+ "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
+ "limit has been exceeded.");
+ return false;
+ }
+
+ /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
+ * Page 76, (Transform Feedback Layout Qualifiers):
+ *
+ * " No aliasing in output buffers is allowed: It is a compile-time or
+ * link-time error to specify variables with overlapping transform
+ * feedback offsets."
+ */
+ const unsigned max_components =
+ ctx->Const.MaxTransformFeedbackInterleavedComponents;
+ const unsigned first_component = xfb_offset;
+ const unsigned last_component = xfb_offset + num_components - 1;
+ const unsigned start_word = BITSET_BITWORD(first_component);
+ const unsigned end_word = BITSET_BITWORD(last_component);
+ BITSET_WORD *used;
+ assert(last_component < max_components);
+
+ if (!used_components[buffer]) {
+ used_components[buffer] =
+ rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
+ }
+ used = used_components[buffer];
+
+ for (unsigned word = start_word; word <= end_word; word++) {
+ unsigned start_range = 0;
+ unsigned end_range = BITSET_WORDBITS - 1;
+
+ if (word == start_word)
+ start_range = first_component % BITSET_WORDBITS;
+
+ if (word == end_word)
+ end_range = last_component % BITSET_WORDBITS;
+
+ if (used[word] & BITSET_RANGE(start_range, end_range)) {
+ linker_error(prog,
+ "variable '%s', xfb_offset (%d) is causing aliasing.",
+ this->orig_name, xfb_offset * 4);
+ return false;
+ }
+ used[word] |= BITSET_RANGE(start_range, end_range);
+ }
+
while (num_components > 0) {
unsigned output_size = MIN2(num_components, 4 - location_frac);
assert((info->NumOutputs == 0 && max_outputs == 0) ||
info->Buffers[buffer].Stride = xfb_offset;
}
- /* From GL_EXT_transform_feedback:
- * A program will fail to link if:
- *
- * * the total number of components to capture is greater than
- * the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
- * and the buffer mode is INTERLEAVED_ATTRIBS_EXT.
- *
- * From GL_ARB_enhanced_layouts:
- *
- * "The resulting stride (implicit or explicit) must be less than or
- * equal to the implementation-dependent constant
- * gl_MaxTransformFeedbackInterleavedComponents."
- */
- if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
- has_xfb_qualifiers) &&
- info->Buffers[buffer].Stride >
- ctx->Const.MaxTransformFeedbackInterleavedComponents) {
- linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
- "limit has been exceeded.");
- return false;
- }
-
store_varying:
info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
this->orig_name);
return this->matched_candidate;
}
+/**
+ * Force a candidate over the previously matched one. It happens when a new
+ * varying needs to be created to match the xfb declaration, for example,
+ * to fullfil an alignment criteria.
+ */
+void
+tfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate)
+{
+ this->matched_candidate = candidate;
+
+ /* The subscript part is no longer relevant */
+ this->is_subscripted = false;
+ this->array_subscript = 0;
+}
+
/**
* Parse all the transform feedback declarations that were passed to
static bool
store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
unsigned num_tfeedback_decls,
- tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers)
+ tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers,
+ const void *mem_ctx)
{
if (!prog->last_vert_prog)
return true;
unsigned num_buffers = 0;
unsigned buffers = 0;
+ BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {};
if (!has_xfb_qualifiers && separate_attribs_mode) {
/* GL_SEPARATE_ATTRIBS */
if (!tfeedback_decls[i].store(ctx, prog,
xfb_prog->sh.LinkedTransformFeedback,
num_buffers, num_buffers, num_outputs,
- NULL, has_xfb_qualifiers))
+ used_components, NULL,
+ has_xfb_qualifiers, mem_ctx))
return false;
buffers |= 1 << num_buffers;
if (!tfeedback_decls[i].store(ctx, prog,
xfb_prog->sh.LinkedTransformFeedback,
buffer, num_buffers, num_outputs,
- explicit_stride, has_xfb_qualifiers))
+ used_components, explicit_stride,
+ has_xfb_qualifiers, mem_ctx))
return false;
num_buffers++;
buffer_stream_id = -1;
if (!tfeedback_decls[i].store(ctx, prog,
xfb_prog->sh.LinkedTransformFeedback,
buffer, num_buffers, num_outputs,
- explicit_stride, has_xfb_qualifiers))
+ used_components, explicit_stride,
+ has_xfb_qualifiers, mem_ctx))
return false;
}
}
class varying_matches
{
public:
- varying_matches(bool disable_varying_packing, bool xfb_enabled,
+ varying_matches(bool disable_varying_packing,
+ bool disable_xfb_packing,
+ bool xfb_enabled,
bool enhanced_layouts_enabled,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage);
*/
const bool disable_varying_packing;
+ /**
+ * If true, this driver disables packing for varyings used by transform
+ * feedback.
+ */
+ const bool disable_xfb_packing;
+
/**
* If true, this driver has transform feedback enabled. The transform
- * feedback code requires at least some packing be done even when varying
- * packing is disabled, fortunately where transform feedback requires
- * packing it's safe to override the disabled setting. See
+ * feedback code usually requires at least some packing be done even
+ * when varying packing is disabled, fortunately where transform feedback
+ * requires packing it's safe to override the disabled setting. See
* is_varying_packing_safe().
*/
const bool xfb_enabled;
static packing_order_enum compute_packing_order(const ir_variable *var);
static int match_comparator(const void *x_generic, const void *y_generic);
static int xfb_comparator(const void *x_generic, const void *y_generic);
+ static int not_xfb_comparator(const void *x_generic, const void *y_generic);
/**
* Structure recording the relationship between a single producer output
} /* anonymous namespace */
varying_matches::varying_matches(bool disable_varying_packing,
+ bool disable_xfb_packing,
bool xfb_enabled,
bool enhanced_layouts_enabled,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage)
: disable_varying_packing(disable_varying_packing),
+ disable_xfb_packing(disable_xfb_packing),
xfb_enabled(xfb_enabled),
enhanced_layouts_enabled(enhanced_layouts_enabled),
producer_stage(producer_stage),
producer_var->type->contains_double());
if (!disable_varying_packing &&
+ (!disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) &&
(needs_flat_qualifier ||
(consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
/* Since this varying is not being consumed by the fragment shader, its
this->matches[this->num_matches].packing_order
= this->compute_packing_order(var);
if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
+ (this->disable_xfb_packing && var->data.is_xfb) ||
var->data.must_be_shader_input) {
unsigned slots = type->count_attribute_slots(false);
this->matches[this->num_matches].num_components = slots * 4;
* When packing is disabled the sort orders varyings used by transform
* feedback first, but also depends on *undefined behaviour* of qsort to
* reverse the order of the varyings. See: xfb_comparator().
+ *
+ * If packing is only disabled for xfb varyings (mutually exclusive with
+ * disable_varying_packing), we then group varyings depending on if they
+ * are captured for transform feedback. The same *undefined behaviour* is
+ * taken advantage of.
*/
- if (!this->disable_varying_packing) {
- /* Sort varying matches into an order that makes them easy to pack. */
- qsort(this->matches, this->num_matches, sizeof(*this->matches),
- &varying_matches::match_comparator);
- } else {
+ if (this->disable_varying_packing) {
/* Only sort varyings that are only used by transform feedback. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::xfb_comparator);
+ } else if (this->disable_xfb_packing) {
+ /* Only sort varyings that are NOT used by transform feedback. */
+ qsort(this->matches, this->num_matches, sizeof(*this->matches),
+ &varying_matches::not_xfb_comparator);
+ } else {
+ /* Sort varying matches into an order that makes them easy to pack. */
+ qsort(this->matches, this->num_matches, sizeof(*this->matches),
+ &varying_matches::match_comparator);
}
unsigned generic_location = 0;
unsigned generic_patch_location = MAX_VARYING*4;
+ bool previous_var_xfb = false;
bool previous_var_xfb_only = false;
unsigned previous_packing_class = ~0u;
* class than the previous one, and we're not already on a slot
* boundary.
*
+ * Also advance if varying packing is disabled for transform feedback,
+ * and previous or current varying is used for transform feedback.
+ *
* Also advance to the next slot if packing is disabled. This makes sure
* we don't assign varyings the same locations which is possible
* because we still pack individual arrays, records and matrices even
* feedback.
*/
if (var->data.must_be_shader_input ||
+ (this->disable_xfb_packing &&
+ (previous_var_xfb || var->data.is_xfb)) ||
(this->disable_varying_packing &&
!(previous_var_xfb_only && var->data.is_xfb_only)) ||
(previous_packing_class != this->matches[i].packing_class) ||
*location = ALIGN(*location, 4);
}
+ previous_var_xfb = var->data.is_xfb;
previous_var_xfb_only = var->data.is_xfb_only;
previous_packing_class = this->matches[i].packing_class;
const glsl_type *type =
get_varying_type(producer_var, producer_stage);
if (type->is_array() || type->is_matrix() || type->is_struct() ||
- type->is_double()) {
+ type->is_64bit()) {
unsigned comp_slots = type->component_slots() + offset;
unsigned slots = comp_slots / 4;
if (comp_slots % 4)
}
+/**
+ * Comparison function passed to qsort() to sort varyings NOT used by
+ * transform feedback when packing of xfb varyings is disabled.
+ */
+int
+varying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic)
+{
+ const match *x = (const match *) x_generic;
+
+ if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
+ return match_comparator(x_generic, y_generic);
+
+ /* FIXME: When the comparator returns 0 it means the elements being
+ * compared are equivalent. However the qsort documentation says:
+ *
+ * "The order of equivalent elements is undefined."
+ *
+ * In practice the sort ends up reversing the order of the varyings which
+ * means locations are also assigned in this reversed order and happens to
+ * be what we want. This is also whats happening in
+ * varying_matches::match_comparator().
+ */
+ return 0;
+}
+
+
/**
* Is the given variable a varying variable to be counted against the
* limit in ctx->Const.MaxVarying?
/* Transform feedback code assumes varying arrays are packed, so if the
* driver has disabled varying packing, make sure to at least enable
- * packing required by transform feedback.
+ * packing required by transform feedback. See below for exception.
*/
bool xfb_enabled =
ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
+ /* Some drivers actually requires packing to be explicitly disabled
+ * for varyings used by transform feedback.
+ */
+ bool disable_xfb_packing =
+ ctx->Const.DisableTransformFeedbackPacking;
+
/* Disable packing on outward facing interfaces for SSO because in ES we
* need to retain the unpacked varying information for draw time
* validation.
if (prog->SeparateShader && (producer == NULL || consumer == NULL))
disable_varying_packing = true;
- varying_matches matches(disable_varying_packing, xfb_enabled,
+ varying_matches matches(disable_varying_packing,
+ disable_xfb_packing,
+ xfb_enabled,
ctx->Extensions.ARB_enhanced_layouts,
producer ? producer->Stage : MESA_SHADER_NONE,
consumer ? consumer->Stage : MESA_SHADER_NONE);
+ void *hash_table_ctx = ralloc_context(NULL);
hash_table *tfeedback_candidates =
- _mesa_hash_table_create(NULL, _mesa_key_hash_string,
+ _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
_mesa_key_string_equal);
hash_table *consumer_inputs =
- _mesa_hash_table_create(NULL, _mesa_key_hash_string,
+ _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
_mesa_key_string_equal);
hash_table *consumer_interface_inputs =
- _mesa_hash_table_create(NULL, _mesa_key_hash_string,
+ _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
_mesa_key_string_equal);
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
NULL,
linker_error(prog, "output %s is assigned to stream=%d but "
"is linked to an input, which requires stream=0",
output_var->name, output_var->data.stream);
+ ralloc_free(hash_table_ctx);
return false;
}
}
= tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
if (matched_candidate == NULL) {
- _mesa_hash_table_destroy(tfeedback_candidates, NULL);
+ ralloc_free(hash_table_ctx);
return false;
}
+ /* There are two situations where a new output varying is needed:
+ *
+ * - If varying packing is disabled for xfb and the current declaration
+ * is not aligned within the top level varying (e.g. vec3_arr[1]).
+ *
+ * - If a builtin variable needs to be copied to a new variable
+ * before its content is modified by another lowering pass (e.g.
+ * \c gl_Position is transformed by \c nir_lower_viewport_transform).
+ */
+ const unsigned dmul =
+ matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
+ const bool lowered =
+ (disable_xfb_packing &&
+ !tfeedback_decls[i].is_aligned(dmul, matched_candidate->offset)) ||
+ (matched_candidate->toplevel_var->data.explicit_location &&
+ matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
+ (ctx->Const.ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
+ BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
+
+ if (lowered) {
+ ir_variable *new_var;
+ tfeedback_candidate *new_candidate = NULL;
+
+ new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name());
+ if (new_var == NULL) {
+ ralloc_free(hash_table_ctx);
+ return false;
+ }
+
+ /* Create new candidate and replace matched_candidate */
+ new_candidate = rzalloc(mem_ctx, tfeedback_candidate);
+ new_candidate->toplevel_var = new_var;
+ new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1;
+ new_candidate->type = new_var->type;
+ new_candidate->offset = 0;
+ _mesa_hash_table_insert(tfeedback_candidates,
+ ralloc_strdup(mem_ctx, new_var->name),
+ new_candidate);
+
+ tfeedback_decls[i].set_lowered_candidate(new_candidate);
+ matched_candidate = new_candidate;
+ }
+
+ /* Mark as xfb varying */
+ matched_candidate->toplevel_var->data.is_xfb = 1;
+
/* Mark xfb varyings as always active */
matched_candidate->toplevel_var->data.always_active_io = 1;
consumer_inputs,
consumer_interface_inputs,
consumer_inputs_with_locations);
- if (input_var)
+ if (input_var) {
+ input_var->data.is_xfb = 1;
input_var->data.always_active_io = 1;
+ }
if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
matched_candidate->toplevel_var->data.is_xfb_only = 1;
}
}
- _mesa_hash_table_destroy(consumer_inputs, NULL);
- _mesa_hash_table_destroy(consumer_interface_inputs, NULL);
-
uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
const unsigned slots_used = matches.assign_locations(
prog, components, reserved_slots);
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (tfeedback_decls[i].is_varying()) {
if (!tfeedback_decls[i].assign_location(ctx, prog)) {
- _mesa_hash_table_destroy(tfeedback_candidates, NULL);
+ ralloc_free(hash_table_ctx);
return false;
}
}
}
- _mesa_hash_table_destroy(tfeedback_candidates, NULL);
+ ralloc_free(hash_table_ctx);
if (consumer && producer) {
foreach_in_list(ir_instruction, node, consumer->ir) {
if (producer) {
lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
0, producer, disable_varying_packing,
- xfb_enabled);
+ disable_xfb_packing, xfb_enabled);
}
if (consumer) {
lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
- consumer_vertices, consumer,
- disable_varying_packing, xfb_enabled);
+ consumer_vertices, consumer, disable_varying_packing,
+ disable_xfb_packing, xfb_enabled);
}
return true;
}
if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
- has_xfb_qualifiers))
+ has_xfb_qualifiers, mem_ctx))
return false;
return true;