mesa/glsl: add api_enabled flag to gl_transform_feedback_info
[mesa.git] / src / compiler / glsl / link_varyings.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file link_varyings.cpp
26 *
27 * Linker functions related specifically to linking varyings between shader
28 * stages.
29 */
30
31
32 #include "main/mtypes.h"
33 #include "glsl_symbol_table.h"
34 #include "glsl_parser_extras.h"
35 #include "ir_optimization.h"
36 #include "linker.h"
37 #include "link_varyings.h"
38 #include "main/macros.h"
39 #include "util/hash_table.h"
40 #include "program.h"
41
42
43 /**
44 * Get the varying type stripped of the outermost array if we're processing
45 * a stage whose varyings are arrays indexed by a vertex number (such as
46 * geometry shader inputs).
47 */
48 static const glsl_type *
49 get_varying_type(const ir_variable *var, gl_shader_stage stage)
50 {
51 const glsl_type *type = var->type;
52
53 if (!var->data.patch &&
54 ((var->data.mode == ir_var_shader_out &&
55 stage == MESA_SHADER_TESS_CTRL) ||
56 (var->data.mode == ir_var_shader_in &&
57 (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
58 stage == MESA_SHADER_GEOMETRY)))) {
59 assert(type->is_array());
60 type = type->fields.array;
61 }
62
63 return type;
64 }
65
66 static void
67 create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
68 size_t name_length, unsigned *count,
69 const char *ifc_member_name,
70 const glsl_type *ifc_member_t, char ***varying_names)
71 {
72 if (t->is_interface()) {
73 size_t new_length = name_length;
74
75 assert(ifc_member_name && ifc_member_t);
76 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
77
78 create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
79 NULL, NULL, varying_names);
80 } else if (t->is_record()) {
81 for (unsigned i = 0; i < t->length; i++) {
82 const char *field = t->fields.structure[i].name;
83 size_t new_length = name_length;
84
85 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
86
87 create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name,
88 new_length, count, NULL, NULL,
89 varying_names);
90 }
91 } else if (t->without_array()->is_record() ||
92 t->without_array()->is_interface() ||
93 (t->is_array() && t->fields.array->is_array())) {
94 for (unsigned i = 0; i < t->length; i++) {
95 size_t new_length = name_length;
96
97 /* Append the subscript to the current variable name */
98 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
99
100 create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length,
101 count, ifc_member_name, ifc_member_t,
102 varying_names);
103 }
104 } else {
105 (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
106 }
107 }
108
109 static bool
110 process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
111 struct gl_shader_program *prog,
112 unsigned *num_tfeedback_decls,
113 char ***varying_names)
114 {
115 bool has_xfb_qualifiers = false;
116
117 /* We still need to enable transform feedback mode even if xfb_stride is
118 * only applied to a global out. Also we don't bother to propagate
119 * xfb_stride to interface block members so this will catch that case also.
120 */
121 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
122 if (prog->TransformFeedback.BufferStride[j]) {
123 has_xfb_qualifiers = true;
124 break;
125 }
126 }
127
128 foreach_in_list(ir_instruction, node, sh->ir) {
129 ir_variable *var = node->as_variable();
130 if (!var || var->data.mode != ir_var_shader_out)
131 continue;
132
133 /* From the ARB_enhanced_layouts spec:
134 *
135 * "Any shader making any static use (after preprocessing) of any of
136 * these *xfb_* qualifiers will cause the shader to be in a
137 * transform feedback capturing mode and hence responsible for
138 * describing the transform feedback setup. This mode will capture
139 * any output selected by *xfb_offset*, directly or indirectly, to
140 * a transform feedback buffer."
141 */
142 if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
143 has_xfb_qualifiers = true;
144 }
145
146 if (var->data.explicit_xfb_offset) {
147 *num_tfeedback_decls += var->type->varying_count();
148 has_xfb_qualifiers = true;
149 }
150 }
151
152 if (*num_tfeedback_decls == 0)
153 return has_xfb_qualifiers;
154
155 unsigned i = 0;
156 *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls);
157 foreach_in_list(ir_instruction, node, sh->ir) {
158 ir_variable *var = node->as_variable();
159 if (!var || var->data.mode != ir_var_shader_out)
160 continue;
161
162 if (var->data.explicit_xfb_offset) {
163 char *name;
164 const glsl_type *type, *member_type;
165
166 if (var->data.from_named_ifc_block) {
167 type = var->get_interface_type();
168 /* Find the member type before it was altered by lowering */
169 member_type =
170 type->fields.structure[type->field_index(var->name)].type;
171 name = ralloc_strdup(NULL, type->without_array()->name);
172 } else {
173 type = var->type;
174 member_type = NULL;
175 name = ralloc_strdup(NULL, var->name);
176 }
177 create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
178 var->name, member_type, varying_names);
179 ralloc_free(name);
180 }
181 }
182
183 assert(i == *num_tfeedback_decls);
184 return has_xfb_qualifiers;
185 }
186
187 /**
188 * Validate the types and qualifiers of an output from one stage against the
189 * matching input to another stage.
190 */
191 static void
192 cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
193 const ir_variable *input,
194 const ir_variable *output,
195 gl_shader_stage consumer_stage,
196 gl_shader_stage producer_stage)
197 {
198 /* Check that the types match between stages.
199 */
200 const glsl_type *type_to_match = input->type;
201
202 /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
203 const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
204 consumer_stage != MESA_SHADER_FRAGMENT) ||
205 consumer_stage == MESA_SHADER_GEOMETRY;
206 if (extra_array_level) {
207 assert(type_to_match->is_array());
208 type_to_match = type_to_match->fields.array;
209 }
210
211 if (type_to_match != output->type) {
212 /* There is a bit of a special case for gl_TexCoord. This
213 * built-in is unsized by default. Applications that variable
214 * access it must redeclare it with a size. There is some
215 * language in the GLSL spec that implies the fragment shader
216 * and vertex shader do not have to agree on this size. Other
217 * driver behave this way, and one or two applications seem to
218 * rely on it.
219 *
220 * Neither declaration needs to be modified here because the array
221 * sizes are fixed later when update_array_sizes is called.
222 *
223 * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
224 *
225 * "Unlike user-defined varying variables, the built-in
226 * varying variables don't have a strict one-to-one
227 * correspondence between the vertex language and the
228 * fragment language."
229 */
230 if (!output->type->is_array() || !is_gl_identifier(output->name)) {
231 linker_error(prog,
232 "%s shader output `%s' declared as type `%s', "
233 "but %s shader input declared as type `%s'\n",
234 _mesa_shader_stage_to_string(producer_stage),
235 output->name,
236 output->type->name,
237 _mesa_shader_stage_to_string(consumer_stage),
238 input->type->name);
239 return;
240 }
241 }
242
243 /* Check that all of the qualifiers match between stages.
244 */
245
246 /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
247 * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
248 * conformance test suite does not verify that the qualifiers must match.
249 * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
250 * OpenGLES 3.0 drivers, so we relax the checking in all cases.
251 */
252 if (false /* always skip the centroid check */ &&
253 prog->data->Version < (prog->IsES ? 310 : 430) &&
254 input->data.centroid != output->data.centroid) {
255 linker_error(prog,
256 "%s shader output `%s' %s centroid qualifier, "
257 "but %s shader input %s centroid qualifier\n",
258 _mesa_shader_stage_to_string(producer_stage),
259 output->name,
260 (output->data.centroid) ? "has" : "lacks",
261 _mesa_shader_stage_to_string(consumer_stage),
262 (input->data.centroid) ? "has" : "lacks");
263 return;
264 }
265
266 if (input->data.sample != output->data.sample) {
267 linker_error(prog,
268 "%s shader output `%s' %s sample qualifier, "
269 "but %s shader input %s sample qualifier\n",
270 _mesa_shader_stage_to_string(producer_stage),
271 output->name,
272 (output->data.sample) ? "has" : "lacks",
273 _mesa_shader_stage_to_string(consumer_stage),
274 (input->data.sample) ? "has" : "lacks");
275 return;
276 }
277
278 if (input->data.patch != output->data.patch) {
279 linker_error(prog,
280 "%s shader output `%s' %s patch qualifier, "
281 "but %s shader input %s patch qualifier\n",
282 _mesa_shader_stage_to_string(producer_stage),
283 output->name,
284 (output->data.patch) ? "has" : "lacks",
285 _mesa_shader_stage_to_string(consumer_stage),
286 (input->data.patch) ? "has" : "lacks");
287 return;
288 }
289
290 /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
291 *
292 * "As only outputs need be declared with invariant, an output from
293 * one shader stage will still match an input of a subsequent stage
294 * without the input being declared as invariant."
295 *
296 * while GLSL 4.20 says:
297 *
298 * "For variables leaving one shader and coming into another shader,
299 * the invariant keyword has to be used in both shaders, or a link
300 * error will result."
301 *
302 * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
303 *
304 * "The invariance of varyings that are declared in both the vertex
305 * and fragment shaders must match."
306 */
307 if (input->data.invariant != output->data.invariant &&
308 prog->data->Version < (prog->IsES ? 300 : 430)) {
309 linker_error(prog,
310 "%s shader output `%s' %s invariant qualifier, "
311 "but %s shader input %s invariant qualifier\n",
312 _mesa_shader_stage_to_string(producer_stage),
313 output->name,
314 (output->data.invariant) ? "has" : "lacks",
315 _mesa_shader_stage_to_string(consumer_stage),
316 (input->data.invariant) ? "has" : "lacks");
317 return;
318 }
319
320 /* GLSL >= 4.40 removes text requiring interpolation qualifiers
321 * to match cross stage, they must only match within the same stage.
322 *
323 * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
324 *
325 * "It is a link-time error if, within the same stage, the interpolation
326 * qualifiers of variables of the same name do not match.
327 *
328 * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
329 *
330 * "When no interpolation qualifier is present, smooth interpolation
331 * is used."
332 *
333 * So we match variables where one is smooth and the other has no explicit
334 * qualifier.
335 */
336 unsigned input_interpolation = input->data.interpolation;
337 unsigned output_interpolation = output->data.interpolation;
338 if (prog->IsES) {
339 if (input_interpolation == INTERP_MODE_NONE)
340 input_interpolation = INTERP_MODE_SMOOTH;
341 if (output_interpolation == INTERP_MODE_NONE)
342 output_interpolation = INTERP_MODE_SMOOTH;
343 }
344 if (input_interpolation != output_interpolation &&
345 prog->data->Version < 440) {
346 linker_error(prog,
347 "%s shader output `%s' specifies %s "
348 "interpolation qualifier, "
349 "but %s shader input specifies %s "
350 "interpolation qualifier\n",
351 _mesa_shader_stage_to_string(producer_stage),
352 output->name,
353 interpolation_string(output->data.interpolation),
354 _mesa_shader_stage_to_string(consumer_stage),
355 interpolation_string(input->data.interpolation));
356 return;
357 }
358 }
359
360 /**
361 * Validate front and back color outputs against single color input
362 */
363 static void
364 cross_validate_front_and_back_color(struct gl_shader_program *prog,
365 const ir_variable *input,
366 const ir_variable *front_color,
367 const ir_variable *back_color,
368 gl_shader_stage consumer_stage,
369 gl_shader_stage producer_stage)
370 {
371 if (front_color != NULL && front_color->data.assigned)
372 cross_validate_types_and_qualifiers(prog, input, front_color,
373 consumer_stage, producer_stage);
374
375 if (back_color != NULL && back_color->data.assigned)
376 cross_validate_types_and_qualifiers(prog, input, back_color,
377 consumer_stage, producer_stage);
378 }
379
380 static unsigned
381 compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
382 {
383 unsigned location_start = VARYING_SLOT_VAR0;
384
385 switch (stage) {
386 case MESA_SHADER_VERTEX:
387 if (var->data.mode == ir_var_shader_in)
388 location_start = VERT_ATTRIB_GENERIC0;
389 break;
390 case MESA_SHADER_TESS_CTRL:
391 case MESA_SHADER_TESS_EVAL:
392 if (var->data.patch)
393 location_start = VARYING_SLOT_PATCH0;
394 break;
395 case MESA_SHADER_FRAGMENT:
396 if (var->data.mode == ir_var_shader_out)
397 location_start = FRAG_RESULT_DATA0;
398 break;
399 default:
400 break;
401 }
402
403 return var->data.location - location_start;
404 }
405
406 struct explicit_location_info {
407 ir_variable *var;
408 unsigned numerical_type;
409 unsigned interpolation;
410 bool centroid;
411 bool sample;
412 bool patch;
413 };
414
415 static inline unsigned
416 get_numerical_type(const glsl_type *type)
417 {
418 /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 68,
419 * (Location aliasing):
420 *
421 * "Further, when location aliasing, the aliases sharing the location
422 * must have the same underlying numerical type (floating-point or
423 * integer)
424 */
425 if (type->is_float() || type->is_double())
426 return GLSL_TYPE_FLOAT;
427 return GLSL_TYPE_INT;
428 }
429
430 static bool
431 check_location_aliasing(struct explicit_location_info explicit_locations[][4],
432 ir_variable *var,
433 unsigned location,
434 unsigned component,
435 unsigned location_limit,
436 const glsl_type *type,
437 unsigned interpolation,
438 bool centroid,
439 bool sample,
440 bool patch,
441 gl_shader_program *prog,
442 gl_shader_stage stage)
443 {
444 unsigned last_comp;
445 if (type->without_array()->is_record()) {
446 /* The component qualifier can't be used on structs so just treat
447 * all component slots as used.
448 */
449 last_comp = 4;
450 } else {
451 unsigned dmul = type->without_array()->is_64bit() ? 2 : 1;
452 last_comp = component + type->without_array()->vector_elements * dmul;
453 }
454
455 while (location < location_limit) {
456 unsigned comp = 0;
457 while (comp < 4) {
458 struct explicit_location_info *info =
459 &explicit_locations[location][comp];
460
461 if (info->var) {
462 /* Component aliasing is not alloed */
463 if (comp >= component && comp < last_comp) {
464 linker_error(prog,
465 "%s shader has multiple outputs explicitly "
466 "assigned to location %d and component %d\n",
467 _mesa_shader_stage_to_string(stage),
468 location, comp);
469 return false;
470 } else {
471 /* For all other used components we need to have matching
472 * types, interpolation and auxiliary storage
473 */
474 if (info->numerical_type !=
475 get_numerical_type(type->without_array())) {
476 linker_error(prog,
477 "Varyings sharing the same location must "
478 "have the same underlying numerical type. "
479 "Location %u component %u\n",
480 location, comp);
481 return false;
482 }
483
484 if (info->interpolation != interpolation) {
485 linker_error(prog,
486 "%s shader has multiple outputs at explicit "
487 "location %u with different interpolation "
488 "settings\n",
489 _mesa_shader_stage_to_string(stage), location);
490 return false;
491 }
492
493 if (info->centroid != centroid ||
494 info->sample != sample ||
495 info->patch != patch) {
496 linker_error(prog,
497 "%s shader has multiple outputs at explicit "
498 "location %u with different aux storage\n",
499 _mesa_shader_stage_to_string(stage), location);
500 return false;
501 }
502 }
503 } else if (comp >= component && comp < last_comp) {
504 info->var = var;
505 info->numerical_type = get_numerical_type(type->without_array());
506 info->interpolation = interpolation;
507 info->centroid = centroid;
508 info->sample = sample;
509 info->patch = patch;
510 }
511
512 comp++;
513
514 /* We need to do some special handling for doubles as dvec3 and
515 * dvec4 consume two consecutive locations. We don't need to
516 * worry about components beginning at anything other than 0 as
517 * the spec does not allow this for dvec3 and dvec4.
518 */
519 if (comp == 4 && last_comp > 4) {
520 last_comp = last_comp - 4;
521 /* Bump location index and reset the component index */
522 location++;
523 comp = 0;
524 component = 0;
525 }
526 }
527
528 location++;
529 }
530
531 return true;
532 }
533
534 static bool
535 validate_explicit_variable_location(struct gl_context *ctx,
536 struct explicit_location_info explicit_locations[][4],
537 ir_variable *var,
538 gl_shader_program *prog,
539 gl_linked_shader *sh)
540 {
541 const glsl_type *type = get_varying_type(var, sh->Stage);
542 unsigned num_elements = type->count_attribute_slots(false);
543 unsigned idx = compute_variable_location_slot(var, sh->Stage);
544 unsigned slot_limit = idx + num_elements;
545
546 /* Vertex shader inputs and fragment shader outputs are validated in
547 * assign_attribute_or_color_locations() so we should not attempt to
548 * validate them again here.
549 */
550 unsigned slot_max;
551 if (var->data.mode == ir_var_shader_out) {
552 assert(sh->Stage != MESA_SHADER_FRAGMENT);
553 slot_max =
554 ctx->Const.Program[sh->Stage].MaxOutputComponents / 4;
555 } else {
556 assert(var->data.mode == ir_var_shader_in);
557 assert(sh->Stage != MESA_SHADER_VERTEX);
558 slot_max =
559 ctx->Const.Program[sh->Stage].MaxInputComponents / 4;
560 }
561
562 if (slot_limit > slot_max) {
563 linker_error(prog,
564 "Invalid location %u in %s shader\n",
565 idx, _mesa_shader_stage_to_string(sh->Stage));
566 return false;
567 }
568
569 if (type->without_array()->is_interface()) {
570 for (unsigned i = 0; i < type->without_array()->length; i++) {
571 glsl_struct_field *field = &type->fields.structure[i];
572 unsigned field_location = field->location -
573 (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
574 if (!check_location_aliasing(explicit_locations, var,
575 field_location,
576 0, field_location + 1,
577 field->type,
578 field->interpolation,
579 field->centroid,
580 field->sample,
581 field->patch,
582 prog, sh->Stage)) {
583 return false;
584 }
585 }
586 } else if (!check_location_aliasing(explicit_locations, var,
587 idx, var->data.location_frac,
588 slot_limit, type,
589 var->data.interpolation,
590 var->data.centroid,
591 var->data.sample,
592 var->data.patch,
593 prog, sh->Stage)) {
594 return false;
595 }
596
597 return true;
598 }
599
600 /**
601 * Validate explicit locations for the inputs to the first stage and the
602 * outputs of the last stage in an SSO program (everything in between is
603 * validated in cross_validate_outputs_to_inputs).
604 */
605 void
606 validate_sso_explicit_locations(struct gl_context *ctx,
607 struct gl_shader_program *prog,
608 gl_shader_stage first_stage,
609 gl_shader_stage last_stage)
610 {
611 assert(prog->SeparateShader);
612
613 /* VS inputs and FS outputs are validated in
614 * assign_attribute_or_color_locations()
615 */
616 bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
617 bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
618 if (!validate_first_stage && !validate_last_stage)
619 return;
620
621 struct explicit_location_info explicit_locations[MAX_VARYING][4];
622
623 gl_shader_stage stages[2] = { first_stage, last_stage };
624 bool validate_stage[2] = { validate_first_stage, validate_last_stage };
625 ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out };
626
627 for (unsigned i = 0; i < 2; i++) {
628 if (!validate_stage[i])
629 continue;
630
631 gl_shader_stage stage = stages[i];
632
633 gl_linked_shader *sh = prog->_LinkedShaders[stage];
634 assert(sh);
635
636 memset(explicit_locations, 0, sizeof(explicit_locations));
637
638 foreach_in_list(ir_instruction, node, sh->ir) {
639 ir_variable *const var = node->as_variable();
640
641 if (var == NULL ||
642 !var->data.explicit_location ||
643 var->data.location < VARYING_SLOT_VAR0 ||
644 var->data.mode != var_direction[i])
645 continue;
646
647 if (!validate_explicit_variable_location(
648 ctx, explicit_locations, var, prog, sh)) {
649 return;
650 }
651 }
652 }
653 }
654
655 /**
656 * Validate that outputs from one stage match inputs of another
657 */
658 void
659 cross_validate_outputs_to_inputs(struct gl_context *ctx,
660 struct gl_shader_program *prog,
661 gl_linked_shader *producer,
662 gl_linked_shader *consumer)
663 {
664 glsl_symbol_table parameters;
665 struct explicit_location_info explicit_locations[MAX_VARYING][4] = { 0 };
666
667 /* Find all shader outputs in the "producer" stage.
668 */
669 foreach_in_list(ir_instruction, node, producer->ir) {
670 ir_variable *const var = node->as_variable();
671
672 if (var == NULL || var->data.mode != ir_var_shader_out)
673 continue;
674
675 if (!var->data.explicit_location
676 || var->data.location < VARYING_SLOT_VAR0)
677 parameters.add_variable(var);
678 else {
679 /* User-defined varyings with explicit locations are handled
680 * differently because they do not need to have matching names.
681 */
682 if (!validate_explicit_variable_location(ctx,
683 explicit_locations,
684 var, prog, producer)) {
685 return;
686 }
687 }
688 }
689
690
691 /* Find all shader inputs in the "consumer" stage. Any variables that have
692 * matching outputs already in the symbol table must have the same type and
693 * qualifiers.
694 *
695 * Exception: if the consumer is the geometry shader, then the inputs
696 * should be arrays and the type of the array element should match the type
697 * of the corresponding producer output.
698 */
699 foreach_in_list(ir_instruction, node, consumer->ir) {
700 ir_variable *const input = node->as_variable();
701
702 if (input == NULL || input->data.mode != ir_var_shader_in)
703 continue;
704
705 if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
706 const ir_variable *const front_color =
707 parameters.get_variable("gl_FrontColor");
708
709 const ir_variable *const back_color =
710 parameters.get_variable("gl_BackColor");
711
712 cross_validate_front_and_back_color(prog, input,
713 front_color, back_color,
714 consumer->Stage, producer->Stage);
715 } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
716 const ir_variable *const front_color =
717 parameters.get_variable("gl_FrontSecondaryColor");
718
719 const ir_variable *const back_color =
720 parameters.get_variable("gl_BackSecondaryColor");
721
722 cross_validate_front_and_back_color(prog, input,
723 front_color, back_color,
724 consumer->Stage, producer->Stage);
725 } else {
726 /* The rules for connecting inputs and outputs change in the presence
727 * of explicit locations. In this case, we no longer care about the
728 * names of the variables. Instead, we care only about the
729 * explicitly assigned location.
730 */
731 ir_variable *output = NULL;
732 if (input->data.explicit_location
733 && input->data.location >= VARYING_SLOT_VAR0) {
734
735 const glsl_type *type = get_varying_type(input, consumer->Stage);
736 unsigned num_elements = type->count_attribute_slots(false);
737 unsigned idx =
738 compute_variable_location_slot(input, consumer->Stage);
739 unsigned slot_limit = idx + num_elements;
740
741 while (idx < slot_limit) {
742 if (idx >= MAX_VARYING) {
743 linker_error(prog,
744 "Invalid location %u in %s shader\n", idx,
745 _mesa_shader_stage_to_string(consumer->Stage));
746 return;
747 }
748
749 output = explicit_locations[idx][input->data.location_frac].var;
750
751 if (output == NULL ||
752 input->data.location != output->data.location) {
753 linker_error(prog,
754 "%s shader input `%s' with explicit location "
755 "has no matching output\n",
756 _mesa_shader_stage_to_string(consumer->Stage),
757 input->name);
758 break;
759 }
760 idx++;
761 }
762 } else {
763 output = parameters.get_variable(input->name);
764 }
765
766 if (output != NULL) {
767 /* Interface blocks have their own validation elsewhere so don't
768 * try validating them here.
769 */
770 if (!(input->get_interface_type() &&
771 output->get_interface_type()))
772 cross_validate_types_and_qualifiers(prog, input, output,
773 consumer->Stage,
774 producer->Stage);
775 } else {
776 /* Check for input vars with unmatched output vars in prev stage
777 * taking into account that interface blocks could have a matching
778 * output but with different name, so we ignore them.
779 */
780 assert(!input->data.assigned);
781 if (input->data.used && !input->get_interface_type() &&
782 !input->data.explicit_location && !prog->SeparateShader)
783 linker_error(prog,
784 "%s shader input `%s' "
785 "has no matching output in the previous stage\n",
786 _mesa_shader_stage_to_string(consumer->Stage),
787 input->name);
788 }
789 }
790 }
791 }
792
793 /**
794 * Demote shader inputs and outputs that are not used in other stages, and
795 * remove them via dead code elimination.
796 */
797 static void
798 remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
799 gl_linked_shader *sh,
800 enum ir_variable_mode mode)
801 {
802 if (is_separate_shader_object)
803 return;
804
805 foreach_in_list(ir_instruction, node, sh->ir) {
806 ir_variable *const var = node->as_variable();
807
808 if (var == NULL || var->data.mode != int(mode))
809 continue;
810
811 /* A shader 'in' or 'out' variable is only really an input or output if
812 * its value is used by other shader stages. This will cause the
813 * variable to have a location assigned.
814 */
815 if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
816 assert(var->data.mode != ir_var_temporary);
817
818 /* Assign zeros to demoted inputs to allow more optimizations. */
819 if (var->data.mode == ir_var_shader_in && !var->constant_value)
820 var->constant_value = ir_constant::zero(var, var->type);
821
822 var->data.mode = ir_var_auto;
823 }
824 }
825
826 /* Eliminate code that is now dead due to unused inputs/outputs being
827 * demoted.
828 */
829 while (do_dead_code(sh->ir, false))
830 ;
831
832 }
833
834 /**
835 * Initialize this object based on a string that was passed to
836 * glTransformFeedbackVaryings.
837 *
838 * If the input is mal-formed, this call still succeeds, but it sets
839 * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var()
840 * will fail to find any matching variable.
841 */
842 void
843 tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx,
844 const char *input)
845 {
846 /* We don't have to be pedantic about what is a valid GLSL variable name,
847 * because any variable with an invalid name can't exist in the IR anyway.
848 */
849
850 this->location = -1;
851 this->orig_name = input;
852 this->lowered_builtin_array_variable = none;
853 this->skip_components = 0;
854 this->next_buffer_separator = false;
855 this->matched_candidate = NULL;
856 this->stream_id = 0;
857 this->buffer = 0;
858 this->offset = 0;
859
860 if (ctx->Extensions.ARB_transform_feedback3) {
861 /* Parse gl_NextBuffer. */
862 if (strcmp(input, "gl_NextBuffer") == 0) {
863 this->next_buffer_separator = true;
864 return;
865 }
866
867 /* Parse gl_SkipComponents. */
868 if (strcmp(input, "gl_SkipComponents1") == 0)
869 this->skip_components = 1;
870 else if (strcmp(input, "gl_SkipComponents2") == 0)
871 this->skip_components = 2;
872 else if (strcmp(input, "gl_SkipComponents3") == 0)
873 this->skip_components = 3;
874 else if (strcmp(input, "gl_SkipComponents4") == 0)
875 this->skip_components = 4;
876
877 if (this->skip_components)
878 return;
879 }
880
881 /* Parse a declaration. */
882 const char *base_name_end;
883 long subscript = parse_program_resource_name(input, &base_name_end);
884 this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
885 if (this->var_name == NULL) {
886 _mesa_error_no_memory(__func__);
887 return;
888 }
889
890 if (subscript >= 0) {
891 this->array_subscript = subscript;
892 this->is_subscripted = true;
893 } else {
894 this->is_subscripted = false;
895 }
896
897 /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
898 * class must behave specially to account for the fact that gl_ClipDistance
899 * is converted from a float[8] to a vec4[2].
900 */
901 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
902 strcmp(this->var_name, "gl_ClipDistance") == 0) {
903 this->lowered_builtin_array_variable = clip_distance;
904 }
905 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
906 strcmp(this->var_name, "gl_CullDistance") == 0) {
907 this->lowered_builtin_array_variable = cull_distance;
908 }
909
910 if (ctx->Const.LowerTessLevel &&
911 (strcmp(this->var_name, "gl_TessLevelOuter") == 0))
912 this->lowered_builtin_array_variable = tess_level_outer;
913 if (ctx->Const.LowerTessLevel &&
914 (strcmp(this->var_name, "gl_TessLevelInner") == 0))
915 this->lowered_builtin_array_variable = tess_level_inner;
916 }
917
918
919 /**
920 * Determine whether two tfeedback_decl objects refer to the same variable and
921 * array index (if applicable).
922 */
923 bool
924 tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y)
925 {
926 assert(x.is_varying() && y.is_varying());
927
928 if (strcmp(x.var_name, y.var_name) != 0)
929 return false;
930 if (x.is_subscripted != y.is_subscripted)
931 return false;
932 if (x.is_subscripted && x.array_subscript != y.array_subscript)
933 return false;
934 return true;
935 }
936
937
938 /**
939 * Assign a location and stream ID for this tfeedback_decl object based on the
940 * transform feedback candidate found by find_candidate.
941 *
942 * If an error occurs, the error is reported through linker_error() and false
943 * is returned.
944 */
945 bool
946 tfeedback_decl::assign_location(struct gl_context *ctx,
947 struct gl_shader_program *prog)
948 {
949 assert(this->is_varying());
950
951 unsigned fine_location
952 = this->matched_candidate->toplevel_var->data.location * 4
953 + this->matched_candidate->toplevel_var->data.location_frac
954 + this->matched_candidate->offset;
955 const unsigned dmul =
956 this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
957
958 if (this->matched_candidate->type->is_array()) {
959 /* Array variable */
960 const unsigned matrix_cols =
961 this->matched_candidate->type->fields.array->matrix_columns;
962 const unsigned vector_elements =
963 this->matched_candidate->type->fields.array->vector_elements;
964 unsigned actual_array_size;
965 switch (this->lowered_builtin_array_variable) {
966 case clip_distance:
967 actual_array_size = prog->last_vert_prog ?
968 prog->last_vert_prog->info.clip_distance_array_size : 0;
969 break;
970 case cull_distance:
971 actual_array_size = prog->last_vert_prog ?
972 prog->last_vert_prog->info.cull_distance_array_size : 0;
973 break;
974 case tess_level_outer:
975 actual_array_size = 4;
976 break;
977 case tess_level_inner:
978 actual_array_size = 2;
979 break;
980 case none:
981 default:
982 actual_array_size = this->matched_candidate->type->array_size();
983 break;
984 }
985
986 if (this->is_subscripted) {
987 /* Check array bounds. */
988 if (this->array_subscript >= actual_array_size) {
989 linker_error(prog, "Transform feedback varying %s has index "
990 "%i, but the array size is %u.",
991 this->orig_name, this->array_subscript,
992 actual_array_size);
993 return false;
994 }
995 unsigned array_elem_size = this->lowered_builtin_array_variable ?
996 1 : vector_elements * matrix_cols * dmul;
997 fine_location += array_elem_size * this->array_subscript;
998 this->size = 1;
999 } else {
1000 this->size = actual_array_size;
1001 }
1002 this->vector_elements = vector_elements;
1003 this->matrix_columns = matrix_cols;
1004 if (this->lowered_builtin_array_variable)
1005 this->type = GL_FLOAT;
1006 else
1007 this->type = this->matched_candidate->type->fields.array->gl_type;
1008 } else {
1009 /* Regular variable (scalar, vector, or matrix) */
1010 if (this->is_subscripted) {
1011 linker_error(prog, "Transform feedback varying %s requested, "
1012 "but %s is not an array.",
1013 this->orig_name, this->var_name);
1014 return false;
1015 }
1016 this->size = 1;
1017 this->vector_elements = this->matched_candidate->type->vector_elements;
1018 this->matrix_columns = this->matched_candidate->type->matrix_columns;
1019 this->type = this->matched_candidate->type->gl_type;
1020 }
1021 this->location = fine_location / 4;
1022 this->location_frac = fine_location % 4;
1023
1024 /* From GL_EXT_transform_feedback:
1025 * A program will fail to link if:
1026 *
1027 * * the total number of components to capture in any varying
1028 * variable in <varyings> is greater than the constant
1029 * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1030 * buffer mode is SEPARATE_ATTRIBS_EXT;
1031 */
1032 if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1033 this->num_components() >
1034 ctx->Const.MaxTransformFeedbackSeparateComponents) {
1035 linker_error(prog, "Transform feedback varying %s exceeds "
1036 "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1037 this->orig_name);
1038 return false;
1039 }
1040
1041 /* Only transform feedback varyings can be assigned to non-zero streams,
1042 * so assign the stream id here.
1043 */
1044 this->stream_id = this->matched_candidate->toplevel_var->data.stream;
1045
1046 unsigned array_offset = this->array_subscript * 4 * dmul;
1047 unsigned struct_offset = this->matched_candidate->offset * 4 * dmul;
1048 this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer;
1049 this->offset = this->matched_candidate->toplevel_var->data.offset +
1050 array_offset + struct_offset;
1051
1052 return true;
1053 }
1054
1055
1056 unsigned
1057 tfeedback_decl::get_num_outputs() const
1058 {
1059 if (!this->is_varying()) {
1060 return 0;
1061 }
1062 return (this->num_components() + this->location_frac + 3)/4;
1063 }
1064
1065
1066 /**
1067 * Update gl_transform_feedback_info to reflect this tfeedback_decl.
1068 *
1069 * If an error occurs, the error is reported through linker_error() and false
1070 * is returned.
1071 */
1072 bool
1073 tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
1074 struct gl_transform_feedback_info *info,
1075 unsigned buffer, unsigned buffer_index,
1076 const unsigned max_outputs, bool *explicit_stride,
1077 bool has_xfb_qualifiers) const
1078 {
1079 unsigned xfb_offset = 0;
1080 unsigned size = this->size;
1081 /* Handle gl_SkipComponents. */
1082 if (this->skip_components) {
1083 info->Buffers[buffer].Stride += this->skip_components;
1084 size = this->skip_components;
1085 goto store_varying;
1086 }
1087
1088 if (this->next_buffer_separator) {
1089 size = 0;
1090 goto store_varying;
1091 }
1092
1093 if (has_xfb_qualifiers) {
1094 xfb_offset = this->offset / 4;
1095 } else {
1096 xfb_offset = info->Buffers[buffer].Stride;
1097 }
1098 info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1099
1100 {
1101 unsigned location = this->location;
1102 unsigned location_frac = this->location_frac;
1103 unsigned num_components = this->num_components();
1104 while (num_components > 0) {
1105 unsigned output_size = MIN2(num_components, 4 - location_frac);
1106 assert((info->NumOutputs == 0 && max_outputs == 0) ||
1107 info->NumOutputs < max_outputs);
1108
1109 /* From the ARB_enhanced_layouts spec:
1110 *
1111 * "If such a block member or variable is not written during a shader
1112 * invocation, the buffer contents at the assigned offset will be
1113 * undefined. Even if there are no static writes to a variable or
1114 * member that is assigned a transform feedback offset, the space is
1115 * still allocated in the buffer and still affects the stride."
1116 */
1117 if (this->is_varying_written()) {
1118 info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1119 info->Outputs[info->NumOutputs].OutputRegister = location;
1120 info->Outputs[info->NumOutputs].NumComponents = output_size;
1121 info->Outputs[info->NumOutputs].StreamId = stream_id;
1122 info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1123 info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1124 ++info->NumOutputs;
1125 }
1126 info->Buffers[buffer].Stream = this->stream_id;
1127 xfb_offset += output_size;
1128
1129 num_components -= output_size;
1130 location++;
1131 location_frac = 0;
1132 }
1133 }
1134
1135 if (explicit_stride && explicit_stride[buffer]) {
1136 if (this->is_64bit() && info->Buffers[buffer].Stride % 2) {
1137 linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
1138 "multiple of 8 as its applied to a type that is or "
1139 "contains a double.",
1140 info->Buffers[buffer].Stride * 4);
1141 return false;
1142 }
1143
1144 if ((this->offset / 4) / info->Buffers[buffer].Stride !=
1145 (xfb_offset - 1) / info->Buffers[buffer].Stride) {
1146 linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
1147 "buffer (%d)", xfb_offset * 4,
1148 info->Buffers[buffer].Stride * 4, buffer);
1149 return false;
1150 }
1151 } else {
1152 info->Buffers[buffer].Stride = xfb_offset;
1153 }
1154
1155 /* From GL_EXT_transform_feedback:
1156 * A program will fail to link if:
1157 *
1158 * * the total number of components to capture is greater than
1159 * the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1160 * and the buffer mode is INTERLEAVED_ATTRIBS_EXT.
1161 *
1162 * From GL_ARB_enhanced_layouts:
1163 *
1164 * "The resulting stride (implicit or explicit) must be less than or
1165 * equal to the implementation-dependent constant
1166 * gl_MaxTransformFeedbackInterleavedComponents."
1167 */
1168 if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1169 has_xfb_qualifiers) &&
1170 info->Buffers[buffer].Stride >
1171 ctx->Const.MaxTransformFeedbackInterleavedComponents) {
1172 linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1173 "limit has been exceeded.");
1174 return false;
1175 }
1176
1177 store_varying:
1178 info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
1179 this->orig_name);
1180 info->Varyings[info->NumVarying].Type = this->type;
1181 info->Varyings[info->NumVarying].Size = size;
1182 info->Varyings[info->NumVarying].BufferIndex = buffer_index;
1183 info->NumVarying++;
1184 info->Buffers[buffer].NumVaryings++;
1185
1186 return true;
1187 }
1188
1189
1190 const tfeedback_candidate *
1191 tfeedback_decl::find_candidate(gl_shader_program *prog,
1192 hash_table *tfeedback_candidates)
1193 {
1194 const char *name = this->var_name;
1195 switch (this->lowered_builtin_array_variable) {
1196 case none:
1197 name = this->var_name;
1198 break;
1199 case clip_distance:
1200 name = "gl_ClipDistanceMESA";
1201 break;
1202 case cull_distance:
1203 name = "gl_CullDistanceMESA";
1204 break;
1205 case tess_level_outer:
1206 name = "gl_TessLevelOuterMESA";
1207 break;
1208 case tess_level_inner:
1209 name = "gl_TessLevelInnerMESA";
1210 break;
1211 }
1212 hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name);
1213
1214 this->matched_candidate = entry ?
1215 (const tfeedback_candidate *) entry->data : NULL;
1216
1217 if (!this->matched_candidate) {
1218 /* From GL_EXT_transform_feedback:
1219 * A program will fail to link if:
1220 *
1221 * * any variable name specified in the <varyings> array is not
1222 * declared as an output in the geometry shader (if present) or
1223 * the vertex shader (if no geometry shader is present);
1224 */
1225 linker_error(prog, "Transform feedback varying %s undeclared.",
1226 this->orig_name);
1227 }
1228
1229 return this->matched_candidate;
1230 }
1231
1232
1233 /**
1234 * Parse all the transform feedback declarations that were passed to
1235 * glTransformFeedbackVaryings() and store them in tfeedback_decl objects.
1236 *
1237 * If an error occurs, the error is reported through linker_error() and false
1238 * is returned.
1239 */
1240 static bool
1241 parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
1242 const void *mem_ctx, unsigned num_names,
1243 char **varying_names, tfeedback_decl *decls)
1244 {
1245 for (unsigned i = 0; i < num_names; ++i) {
1246 decls[i].init(ctx, mem_ctx, varying_names[i]);
1247
1248 if (!decls[i].is_varying())
1249 continue;
1250
1251 /* From GL_EXT_transform_feedback:
1252 * A program will fail to link if:
1253 *
1254 * * any two entries in the <varyings> array specify the same varying
1255 * variable;
1256 *
1257 * We interpret this to mean "any two entries in the <varyings> array
1258 * specify the same varying variable and array index", since transform
1259 * feedback of arrays would be useless otherwise.
1260 */
1261 for (unsigned j = 0; j < i; ++j) {
1262 if (!decls[j].is_varying())
1263 continue;
1264
1265 if (tfeedback_decl::is_same(decls[i], decls[j])) {
1266 linker_error(prog, "Transform feedback varying %s specified "
1267 "more than once.", varying_names[i]);
1268 return false;
1269 }
1270 }
1271 }
1272 return true;
1273 }
1274
1275
1276 static int
1277 cmp_xfb_offset(const void * x_generic, const void * y_generic)
1278 {
1279 tfeedback_decl *x = (tfeedback_decl *) x_generic;
1280 tfeedback_decl *y = (tfeedback_decl *) y_generic;
1281
1282 if (x->get_buffer() != y->get_buffer())
1283 return x->get_buffer() - y->get_buffer();
1284 return x->get_offset() - y->get_offset();
1285 }
1286
1287 /**
1288 * Store transform feedback location assignments into
1289 * prog->sh.LinkedTransformFeedback based on the data stored in
1290 * tfeedback_decls.
1291 *
1292 * If an error occurs, the error is reported through linker_error() and false
1293 * is returned.
1294 */
1295 static bool
1296 store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
1297 unsigned num_tfeedback_decls,
1298 tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers)
1299 {
1300 if (!prog->last_vert_prog)
1301 return true;
1302
1303 /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
1304 * tracking the number of buffers doesn't overflow.
1305 */
1306 assert(ctx->Const.MaxTransformFeedbackBuffers < 32);
1307
1308 bool separate_attribs_mode =
1309 prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
1310
1311 struct gl_program *xfb_prog = prog->last_vert_prog;
1312 xfb_prog->sh.LinkedTransformFeedback =
1313 rzalloc(xfb_prog, struct gl_transform_feedback_info);
1314
1315 /* The xfb_offset qualifier does not have to be used in increasing order
1316 * however some drivers expect to receive the list of transform feedback
1317 * declarations in order so sort it now for convenience.
1318 */
1319 if (has_xfb_qualifiers) {
1320 qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
1321 cmp_xfb_offset);
1322 } else {
1323 xfb_prog->sh.LinkedTransformFeedback->api_enabled = true;
1324 }
1325
1326 xfb_prog->sh.LinkedTransformFeedback->Varyings =
1327 rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
1328 num_tfeedback_decls);
1329
1330 unsigned num_outputs = 0;
1331 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1332 if (tfeedback_decls[i].is_varying_written())
1333 num_outputs += tfeedback_decls[i].get_num_outputs();
1334 }
1335
1336 xfb_prog->sh.LinkedTransformFeedback->Outputs =
1337 rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
1338 num_outputs);
1339
1340 unsigned num_buffers = 0;
1341 unsigned buffers = 0;
1342
1343 if (!has_xfb_qualifiers && separate_attribs_mode) {
1344 /* GL_SEPARATE_ATTRIBS */
1345 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1346 if (!tfeedback_decls[i].store(ctx, prog,
1347 xfb_prog->sh.LinkedTransformFeedback,
1348 num_buffers, num_buffers, num_outputs,
1349 NULL, has_xfb_qualifiers))
1350 return false;
1351
1352 buffers |= 1 << num_buffers;
1353 num_buffers++;
1354 }
1355 }
1356 else {
1357 /* GL_INVERLEAVED_ATTRIBS */
1358 int buffer_stream_id = -1;
1359 unsigned buffer =
1360 num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0;
1361 bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
1362
1363 /* Apply any xfb_stride global qualifiers */
1364 if (has_xfb_qualifiers) {
1365 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1366 if (prog->TransformFeedback.BufferStride[j]) {
1367 buffers |= 1 << j;
1368 explicit_stride[j] = true;
1369 xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
1370 prog->TransformFeedback.BufferStride[j] / 4;
1371 }
1372 }
1373 }
1374
1375 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1376 if (has_xfb_qualifiers &&
1377 buffer != tfeedback_decls[i].get_buffer()) {
1378 /* we have moved to the next buffer so reset stream id */
1379 buffer_stream_id = -1;
1380 num_buffers++;
1381 }
1382
1383 if (tfeedback_decls[i].is_next_buffer_separator()) {
1384 if (!tfeedback_decls[i].store(ctx, prog,
1385 xfb_prog->sh.LinkedTransformFeedback,
1386 buffer, num_buffers, num_outputs,
1387 explicit_stride, has_xfb_qualifiers))
1388 return false;
1389 num_buffers++;
1390 buffer_stream_id = -1;
1391 continue;
1392 } else if (tfeedback_decls[i].is_varying()) {
1393 if (buffer_stream_id == -1) {
1394 /* First varying writing to this buffer: remember its stream */
1395 buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
1396 } else if (buffer_stream_id !=
1397 (int) tfeedback_decls[i].get_stream_id()) {
1398 /* Varying writes to the same buffer from a different stream */
1399 linker_error(prog,
1400 "Transform feedback can't capture varyings belonging "
1401 "to different vertex streams in a single buffer. "
1402 "Varying %s writes to buffer from stream %u, other "
1403 "varyings in the same buffer write from stream %u.",
1404 tfeedback_decls[i].name(),
1405 tfeedback_decls[i].get_stream_id(),
1406 buffer_stream_id);
1407 return false;
1408 }
1409 }
1410
1411 if (has_xfb_qualifiers) {
1412 buffer = tfeedback_decls[i].get_buffer();
1413 } else {
1414 buffer = num_buffers;
1415 }
1416 buffers |= 1 << buffer;
1417
1418 if (!tfeedback_decls[i].store(ctx, prog,
1419 xfb_prog->sh.LinkedTransformFeedback,
1420 buffer, num_buffers, num_outputs,
1421 explicit_stride, has_xfb_qualifiers))
1422 return false;
1423 }
1424 }
1425
1426 assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
1427
1428 xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
1429 return true;
1430 }
1431
1432 namespace {
1433
1434 /**
1435 * Data structure recording the relationship between outputs of one shader
1436 * stage (the "producer") and inputs of another (the "consumer").
1437 */
1438 class varying_matches
1439 {
1440 public:
1441 varying_matches(bool disable_varying_packing, bool xfb_enabled,
1442 bool enhanced_layouts_enabled,
1443 gl_shader_stage producer_stage,
1444 gl_shader_stage consumer_stage);
1445 ~varying_matches();
1446 void record(ir_variable *producer_var, ir_variable *consumer_var);
1447 unsigned assign_locations(struct gl_shader_program *prog,
1448 uint8_t *components,
1449 uint64_t reserved_slots);
1450 void store_locations() const;
1451
1452 private:
1453 bool is_varying_packing_safe(const glsl_type *type,
1454 const ir_variable *var);
1455
1456 /**
1457 * If true, this driver disables varying packing, so all varyings need to
1458 * be aligned on slot boundaries, and take up a number of slots equal to
1459 * their number of matrix columns times their array size.
1460 *
1461 * Packing may also be disabled because our current packing method is not
1462 * safe in SSO or versions of OpenGL where interpolation qualifiers are not
1463 * guaranteed to match across stages.
1464 */
1465 const bool disable_varying_packing;
1466
1467 /**
1468 * If true, this driver has transform feedback enabled. The transform
1469 * feedback code requires at least some packing be done even when varying
1470 * packing is disabled, fortunately where transform feedback requires
1471 * packing it's safe to override the disabled setting. See
1472 * is_varying_packing_safe().
1473 */
1474 const bool xfb_enabled;
1475
1476 const bool enhanced_layouts_enabled;
1477
1478 /**
1479 * Enum representing the order in which varyings are packed within a
1480 * packing class.
1481 *
1482 * Currently we pack vec4's first, then vec2's, then scalar values, then
1483 * vec3's. This order ensures that the only vectors that are at risk of
1484 * having to be "double parked" (split between two adjacent varying slots)
1485 * are the vec3's.
1486 */
1487 enum packing_order_enum {
1488 PACKING_ORDER_VEC4,
1489 PACKING_ORDER_VEC2,
1490 PACKING_ORDER_SCALAR,
1491 PACKING_ORDER_VEC3,
1492 };
1493
1494 static unsigned compute_packing_class(const ir_variable *var);
1495 static packing_order_enum compute_packing_order(const ir_variable *var);
1496 static int match_comparator(const void *x_generic, const void *y_generic);
1497 static int xfb_comparator(const void *x_generic, const void *y_generic);
1498
1499 /**
1500 * Structure recording the relationship between a single producer output
1501 * and a single consumer input.
1502 */
1503 struct match {
1504 /**
1505 * Packing class for this varying, computed by compute_packing_class().
1506 */
1507 unsigned packing_class;
1508
1509 /**
1510 * Packing order for this varying, computed by compute_packing_order().
1511 */
1512 packing_order_enum packing_order;
1513 unsigned num_components;
1514
1515 /**
1516 * The output variable in the producer stage.
1517 */
1518 ir_variable *producer_var;
1519
1520 /**
1521 * The input variable in the consumer stage.
1522 */
1523 ir_variable *consumer_var;
1524
1525 /**
1526 * The location which has been assigned for this varying. This is
1527 * expressed in multiples of a float, with the first generic varying
1528 * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
1529 * value 0.
1530 */
1531 unsigned generic_location;
1532 } *matches;
1533
1534 /**
1535 * The number of elements in the \c matches array that are currently in
1536 * use.
1537 */
1538 unsigned num_matches;
1539
1540 /**
1541 * The number of elements that were set aside for the \c matches array when
1542 * it was allocated.
1543 */
1544 unsigned matches_capacity;
1545
1546 gl_shader_stage producer_stage;
1547 gl_shader_stage consumer_stage;
1548 };
1549
1550 } /* anonymous namespace */
1551
1552 varying_matches::varying_matches(bool disable_varying_packing,
1553 bool xfb_enabled,
1554 bool enhanced_layouts_enabled,
1555 gl_shader_stage producer_stage,
1556 gl_shader_stage consumer_stage)
1557 : disable_varying_packing(disable_varying_packing),
1558 xfb_enabled(xfb_enabled),
1559 enhanced_layouts_enabled(enhanced_layouts_enabled),
1560 producer_stage(producer_stage),
1561 consumer_stage(consumer_stage)
1562 {
1563 /* Note: this initial capacity is rather arbitrarily chosen to be large
1564 * enough for many cases without wasting an unreasonable amount of space.
1565 * varying_matches::record() will resize the array if there are more than
1566 * this number of varyings.
1567 */
1568 this->matches_capacity = 8;
1569 this->matches = (match *)
1570 malloc(sizeof(*this->matches) * this->matches_capacity);
1571 this->num_matches = 0;
1572 }
1573
1574
1575 varying_matches::~varying_matches()
1576 {
1577 free(this->matches);
1578 }
1579
1580
1581 /**
1582 * Packing is always safe on individual arrays, structures, and matrices. It
1583 * is also safe if the varying is only used for transform feedback.
1584 */
1585 bool
1586 varying_matches::is_varying_packing_safe(const glsl_type *type,
1587 const ir_variable *var)
1588 {
1589 if (consumer_stage == MESA_SHADER_TESS_EVAL ||
1590 consumer_stage == MESA_SHADER_TESS_CTRL ||
1591 producer_stage == MESA_SHADER_TESS_CTRL)
1592 return false;
1593
1594 return xfb_enabled && (type->is_array() || type->is_record() ||
1595 type->is_matrix() || var->data.is_xfb_only);
1596 }
1597
1598
1599 /**
1600 * Record the given producer/consumer variable pair in the list of variables
1601 * that should later be assigned locations.
1602 *
1603 * It is permissible for \c consumer_var to be NULL (this happens if a
1604 * variable is output by the producer and consumed by transform feedback, but
1605 * not consumed by the consumer).
1606 *
1607 * If \c producer_var has already been paired up with a consumer_var, or
1608 * producer_var is part of fixed pipeline functionality (and hence already has
1609 * a location assigned), this function has no effect.
1610 *
1611 * Note: as a side effect this function may change the interpolation type of
1612 * \c producer_var, but only when the change couldn't possibly affect
1613 * rendering.
1614 */
1615 void
1616 varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
1617 {
1618 assert(producer_var != NULL || consumer_var != NULL);
1619
1620 if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
1621 producer_var->data.explicit_location)) ||
1622 (consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
1623 consumer_var->data.explicit_location))) {
1624 /* Either a location already exists for this variable (since it is part
1625 * of fixed functionality), or it has already been recorded as part of a
1626 * previous match.
1627 */
1628 return;
1629 }
1630
1631 bool needs_flat_qualifier = consumer_var == NULL &&
1632 (producer_var->type->contains_integer() ||
1633 producer_var->type->contains_double());
1634
1635 if (!disable_varying_packing &&
1636 (needs_flat_qualifier ||
1637 (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
1638 /* Since this varying is not being consumed by the fragment shader, its
1639 * interpolation type varying cannot possibly affect rendering.
1640 * Also, this variable is non-flat and is (or contains) an integer
1641 * or a double.
1642 * If the consumer stage is unknown, don't modify the interpolation
1643 * type as it could affect rendering later with separate shaders.
1644 *
1645 * lower_packed_varyings requires all integer varyings to flat,
1646 * regardless of where they appear. We can trivially satisfy that
1647 * requirement by changing the interpolation type to flat here.
1648 */
1649 if (producer_var) {
1650 producer_var->data.centroid = false;
1651 producer_var->data.sample = false;
1652 producer_var->data.interpolation = INTERP_MODE_FLAT;
1653 }
1654
1655 if (consumer_var) {
1656 consumer_var->data.centroid = false;
1657 consumer_var->data.sample = false;
1658 consumer_var->data.interpolation = INTERP_MODE_FLAT;
1659 }
1660 }
1661
1662 if (this->num_matches == this->matches_capacity) {
1663 this->matches_capacity *= 2;
1664 this->matches = (match *)
1665 realloc(this->matches,
1666 sizeof(*this->matches) * this->matches_capacity);
1667 }
1668
1669 /* We must use the consumer to compute the packing class because in GL4.4+
1670 * there is no guarantee interpolation qualifiers will match across stages.
1671 *
1672 * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
1673 *
1674 * "The type and presence of interpolation qualifiers of variables with
1675 * the same name declared in all linked shaders for the same cross-stage
1676 * interface must match, otherwise the link command will fail.
1677 *
1678 * When comparing an output from one stage to an input of a subsequent
1679 * stage, the input and output don't match if their interpolation
1680 * qualifiers (or lack thereof) are not the same."
1681 *
1682 * This text was also in at least revison 7 of the 4.40 spec but is no
1683 * longer in revision 9 and not in the 4.50 spec.
1684 */
1685 const ir_variable *const var = (consumer_var != NULL)
1686 ? consumer_var : producer_var;
1687 const gl_shader_stage stage = (consumer_var != NULL)
1688 ? consumer_stage : producer_stage;
1689 const glsl_type *type = get_varying_type(var, stage);
1690
1691 if (producer_var && consumer_var &&
1692 consumer_var->data.must_be_shader_input) {
1693 producer_var->data.must_be_shader_input = 1;
1694 }
1695
1696 this->matches[this->num_matches].packing_class
1697 = this->compute_packing_class(var);
1698 this->matches[this->num_matches].packing_order
1699 = this->compute_packing_order(var);
1700 if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
1701 var->data.must_be_shader_input) {
1702 unsigned slots = type->count_attribute_slots(false);
1703 this->matches[this->num_matches].num_components = slots * 4;
1704 } else {
1705 this->matches[this->num_matches].num_components
1706 = type->component_slots();
1707 }
1708
1709 this->matches[this->num_matches].producer_var = producer_var;
1710 this->matches[this->num_matches].consumer_var = consumer_var;
1711 this->num_matches++;
1712 if (producer_var)
1713 producer_var->data.is_unmatched_generic_inout = 0;
1714 if (consumer_var)
1715 consumer_var->data.is_unmatched_generic_inout = 0;
1716 }
1717
1718
1719 /**
1720 * Choose locations for all of the variable matches that were previously
1721 * passed to varying_matches::record().
1722 */
1723 unsigned
1724 varying_matches::assign_locations(struct gl_shader_program *prog,
1725 uint8_t *components,
1726 uint64_t reserved_slots)
1727 {
1728 /* If packing has been disabled then we cannot safely sort the varyings by
1729 * class as it may mean we are using a version of OpenGL where
1730 * interpolation qualifiers are not guaranteed to be matching across
1731 * shaders, sorting in this case could result in mismatching shader
1732 * interfaces.
1733 * When packing is disabled the sort orders varyings used by transform
1734 * feedback first, but also depends on *undefined behaviour* of qsort to
1735 * reverse the order of the varyings. See: xfb_comparator().
1736 */
1737 if (!this->disable_varying_packing) {
1738 /* Sort varying matches into an order that makes them easy to pack. */
1739 qsort(this->matches, this->num_matches, sizeof(*this->matches),
1740 &varying_matches::match_comparator);
1741 } else {
1742 /* Only sort varyings that are only used by transform feedback. */
1743 qsort(this->matches, this->num_matches, sizeof(*this->matches),
1744 &varying_matches::xfb_comparator);
1745 }
1746
1747 unsigned generic_location = 0;
1748 unsigned generic_patch_location = MAX_VARYING*4;
1749 bool previous_var_xfb_only = false;
1750
1751 for (unsigned i = 0; i < this->num_matches; i++) {
1752 unsigned *location = &generic_location;
1753
1754 const ir_variable *var;
1755 const glsl_type *type;
1756 bool is_vertex_input = false;
1757 if (matches[i].consumer_var) {
1758 var = matches[i].consumer_var;
1759 type = get_varying_type(var, consumer_stage);
1760 if (consumer_stage == MESA_SHADER_VERTEX)
1761 is_vertex_input = true;
1762 } else {
1763 var = matches[i].producer_var;
1764 type = get_varying_type(var, producer_stage);
1765 }
1766
1767 if (var->data.patch)
1768 location = &generic_patch_location;
1769
1770 /* Advance to the next slot if this varying has a different packing
1771 * class than the previous one, and we're not already on a slot
1772 * boundary.
1773 *
1774 * Also advance to the next slot if packing is disabled. This makes sure
1775 * we don't assign varyings the same locations which is possible
1776 * because we still pack individual arrays, records and matrices even
1777 * when packing is disabled. Note we don't advance to the next slot if
1778 * we can pack varyings together that are only used for transform
1779 * feedback.
1780 */
1781 if (var->data.must_be_shader_input ||
1782 (this->disable_varying_packing &&
1783 !(previous_var_xfb_only && var->data.is_xfb_only)) ||
1784 (i > 0 && this->matches[i - 1].packing_class
1785 != this->matches[i].packing_class )) {
1786 *location = ALIGN(*location, 4);
1787 }
1788
1789 previous_var_xfb_only = var->data.is_xfb_only;
1790
1791 /* The number of components taken up by this variable. For vertex shader
1792 * inputs, we use the number of slots * 4, as they have different
1793 * counting rules.
1794 */
1795 unsigned num_components = is_vertex_input ?
1796 type->count_attribute_slots(is_vertex_input) * 4 :
1797 this->matches[i].num_components;
1798
1799 /* The last slot for this variable, inclusive. */
1800 unsigned slot_end = *location + num_components - 1;
1801
1802 /* FIXME: We could be smarter in the below code and loop back over
1803 * trying to fill any locations that we skipped because we couldn't pack
1804 * the varying between an explicit location. For now just let the user
1805 * hit the linking error if we run out of room and suggest they use
1806 * explicit locations.
1807 */
1808 while (slot_end < MAX_VARYING * 4u) {
1809 const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
1810 const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
1811
1812 assert(slots > 0);
1813 if (reserved_slots & slot_mask) {
1814 *location = ALIGN(*location + 1, 4);
1815 slot_end = *location + num_components - 1;
1816 continue;
1817 }
1818
1819 break;
1820 }
1821
1822 if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
1823 linker_error(prog, "insufficient contiguous locations available for "
1824 "%s it is possible an array or struct could not be "
1825 "packed between varyings with explicit locations. Try "
1826 "using an explicit location for arrays and structs.",
1827 var->name);
1828 }
1829
1830 if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
1831 for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
1832 components[j] = 4;
1833 components[slot_end / 4u] = (slot_end & 3) + 1;
1834 }
1835
1836 this->matches[i].generic_location = *location;
1837
1838 *location = slot_end + 1;
1839 }
1840
1841 return (generic_location + 3) / 4;
1842 }
1843
1844
1845 /**
1846 * Update the producer and consumer shaders to reflect the locations
1847 * assignments that were made by varying_matches::assign_locations().
1848 */
1849 void
1850 varying_matches::store_locations() const
1851 {
1852 /* Check is location needs to be packed with lower_packed_varyings() or if
1853 * we can just use ARB_enhanced_layouts packing.
1854 */
1855 bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 };
1856 const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
1857
1858 for (unsigned i = 0; i < this->num_matches; i++) {
1859 ir_variable *producer_var = this->matches[i].producer_var;
1860 ir_variable *consumer_var = this->matches[i].consumer_var;
1861 unsigned generic_location = this->matches[i].generic_location;
1862 unsigned slot = generic_location / 4;
1863 unsigned offset = generic_location % 4;
1864
1865 if (producer_var) {
1866 producer_var->data.location = VARYING_SLOT_VAR0 + slot;
1867 producer_var->data.location_frac = offset;
1868 }
1869
1870 if (consumer_var) {
1871 assert(consumer_var->data.location == -1);
1872 consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
1873 consumer_var->data.location_frac = offset;
1874 }
1875
1876 /* Find locations suitable for native packing via
1877 * ARB_enhanced_layouts.
1878 */
1879 if (producer_var && consumer_var) {
1880 if (enhanced_layouts_enabled) {
1881 const glsl_type *type =
1882 get_varying_type(producer_var, producer_stage);
1883 if (type->is_array() || type->is_matrix() || type->is_record() ||
1884 type->is_double()) {
1885 unsigned comp_slots = type->component_slots() + offset;
1886 unsigned slots = comp_slots / 4;
1887 if (comp_slots % 4)
1888 slots += 1;
1889
1890 for (unsigned j = 0; j < slots; j++) {
1891 pack_loc[slot + j] = true;
1892 }
1893 } else if (offset + type->vector_elements > 4) {
1894 pack_loc[slot] = true;
1895 pack_loc[slot + 1] = true;
1896 } else {
1897 loc_type[slot][offset] = type;
1898 }
1899 }
1900 }
1901 }
1902
1903 /* Attempt to use ARB_enhanced_layouts for more efficient packing if
1904 * suitable.
1905 */
1906 if (enhanced_layouts_enabled) {
1907 for (unsigned i = 0; i < this->num_matches; i++) {
1908 ir_variable *producer_var = this->matches[i].producer_var;
1909 ir_variable *consumer_var = this->matches[i].consumer_var;
1910 unsigned generic_location = this->matches[i].generic_location;
1911 unsigned slot = generic_location / 4;
1912
1913 if (pack_loc[slot] || !producer_var || !consumer_var)
1914 continue;
1915
1916 const glsl_type *type =
1917 get_varying_type(producer_var, producer_stage);
1918 bool type_match = true;
1919 for (unsigned j = 0; j < 4; j++) {
1920 if (loc_type[slot][j]) {
1921 if (type->base_type != loc_type[slot][j]->base_type)
1922 type_match = false;
1923 }
1924 }
1925
1926 if (type_match) {
1927 producer_var->data.explicit_location = 1;
1928 consumer_var->data.explicit_location = 1;
1929 producer_var->data.explicit_component = 1;
1930 consumer_var->data.explicit_component = 1;
1931 }
1932 }
1933 }
1934 }
1935
1936
1937 /**
1938 * Compute the "packing class" of the given varying. This is an unsigned
1939 * integer with the property that two variables in the same packing class can
1940 * be safely backed into the same vec4.
1941 */
1942 unsigned
1943 varying_matches::compute_packing_class(const ir_variable *var)
1944 {
1945 /* Without help from the back-end, there is no way to pack together
1946 * variables with different interpolation types, because
1947 * lower_packed_varyings must choose exactly one interpolation type for
1948 * each packed varying it creates.
1949 *
1950 * However, we can safely pack together floats, ints, and uints, because:
1951 *
1952 * - varyings of base type "int" and "uint" must use the "flat"
1953 * interpolation type, which can only occur in GLSL 1.30 and above.
1954 *
1955 * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
1956 * can store flat floats as ints without losing any information (using
1957 * the ir_unop_bitcast_* opcodes).
1958 *
1959 * Therefore, the packing class depends only on the interpolation type.
1960 */
1961 unsigned packing_class = var->data.centroid | (var->data.sample << 1) |
1962 (var->data.patch << 2) |
1963 (var->data.must_be_shader_input << 3);
1964 packing_class *= 8;
1965 packing_class += var->is_interpolation_flat()
1966 ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation;
1967 return packing_class;
1968 }
1969
1970
1971 /**
1972 * Compute the "packing order" of the given varying. This is a sort key we
1973 * use to determine when to attempt to pack the given varying relative to
1974 * other varyings in the same packing class.
1975 */
1976 varying_matches::packing_order_enum
1977 varying_matches::compute_packing_order(const ir_variable *var)
1978 {
1979 const glsl_type *element_type = var->type;
1980
1981 while (element_type->is_array()) {
1982 element_type = element_type->fields.array;
1983 }
1984
1985 switch (element_type->component_slots() % 4) {
1986 case 1: return PACKING_ORDER_SCALAR;
1987 case 2: return PACKING_ORDER_VEC2;
1988 case 3: return PACKING_ORDER_VEC3;
1989 case 0: return PACKING_ORDER_VEC4;
1990 default:
1991 assert(!"Unexpected value of vector_elements");
1992 return PACKING_ORDER_VEC4;
1993 }
1994 }
1995
1996
1997 /**
1998 * Comparison function passed to qsort() to sort varyings by packing_class and
1999 * then by packing_order.
2000 */
2001 int
2002 varying_matches::match_comparator(const void *x_generic, const void *y_generic)
2003 {
2004 const match *x = (const match *) x_generic;
2005 const match *y = (const match *) y_generic;
2006
2007 if (x->packing_class != y->packing_class)
2008 return x->packing_class - y->packing_class;
2009 return x->packing_order - y->packing_order;
2010 }
2011
2012
2013 /**
2014 * Comparison function passed to qsort() to sort varyings used only by
2015 * transform feedback when packing of other varyings is disabled.
2016 */
2017 int
2018 varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
2019 {
2020 const match *x = (const match *) x_generic;
2021
2022 if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
2023 return match_comparator(x_generic, y_generic);
2024
2025 /* FIXME: When the comparator returns 0 it means the elements being
2026 * compared are equivalent. However the qsort documentation says:
2027 *
2028 * "The order of equivalent elements is undefined."
2029 *
2030 * In practice the sort ends up reversing the order of the varyings which
2031 * means locations are also assigned in this reversed order and happens to
2032 * be what we want. This is also whats happening in
2033 * varying_matches::match_comparator().
2034 */
2035 return 0;
2036 }
2037
2038
2039 /**
2040 * Is the given variable a varying variable to be counted against the
2041 * limit in ctx->Const.MaxVarying?
2042 * This includes variables such as texcoords, colors and generic
2043 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
2044 */
2045 static bool
2046 var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var)
2047 {
2048 /* Only fragment shaders will take a varying variable as an input */
2049 if (stage == MESA_SHADER_FRAGMENT &&
2050 var->data.mode == ir_var_shader_in) {
2051 switch (var->data.location) {
2052 case VARYING_SLOT_POS:
2053 case VARYING_SLOT_FACE:
2054 case VARYING_SLOT_PNTC:
2055 return false;
2056 default:
2057 return true;
2058 }
2059 }
2060 return false;
2061 }
2062
2063
2064 /**
2065 * Visitor class that generates tfeedback_candidate structs describing all
2066 * possible targets of transform feedback.
2067 *
2068 * tfeedback_candidate structs are stored in the hash table
2069 * tfeedback_candidates, which is passed to the constructor. This hash table
2070 * maps varying names to instances of the tfeedback_candidate struct.
2071 */
2072 class tfeedback_candidate_generator : public program_resource_visitor
2073 {
2074 public:
2075 tfeedback_candidate_generator(void *mem_ctx,
2076 hash_table *tfeedback_candidates)
2077 : mem_ctx(mem_ctx),
2078 tfeedback_candidates(tfeedback_candidates),
2079 toplevel_var(NULL),
2080 varying_floats(0)
2081 {
2082 }
2083
2084 void process(ir_variable *var)
2085 {
2086 /* All named varying interface blocks should be flattened by now */
2087 assert(!var->is_interface_instance());
2088
2089 this->toplevel_var = var;
2090 this->varying_floats = 0;
2091 program_resource_visitor::process(var, false);
2092 }
2093
2094 private:
2095 virtual void visit_field(const glsl_type *type, const char *name,
2096 bool /* row_major */,
2097 const glsl_type * /* record_type */,
2098 const enum glsl_interface_packing,
2099 bool /* last_field */)
2100 {
2101 assert(!type->without_array()->is_record());
2102 assert(!type->without_array()->is_interface());
2103
2104 tfeedback_candidate *candidate
2105 = rzalloc(this->mem_ctx, tfeedback_candidate);
2106 candidate->toplevel_var = this->toplevel_var;
2107 candidate->type = type;
2108 candidate->offset = this->varying_floats;
2109 _mesa_hash_table_insert(this->tfeedback_candidates,
2110 ralloc_strdup(this->mem_ctx, name),
2111 candidate);
2112 this->varying_floats += type->component_slots();
2113 }
2114
2115 /**
2116 * Memory context used to allocate hash table keys and values.
2117 */
2118 void * const mem_ctx;
2119
2120 /**
2121 * Hash table in which tfeedback_candidate objects should be stored.
2122 */
2123 hash_table * const tfeedback_candidates;
2124
2125 /**
2126 * Pointer to the toplevel variable that is being traversed.
2127 */
2128 ir_variable *toplevel_var;
2129
2130 /**
2131 * Total number of varying floats that have been visited so far. This is
2132 * used to determine the offset to each varying within the toplevel
2133 * variable.
2134 */
2135 unsigned varying_floats;
2136 };
2137
2138
2139 namespace linker {
2140
2141 void
2142 populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
2143 hash_table *consumer_inputs,
2144 hash_table *consumer_interface_inputs,
2145 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2146 {
2147 memset(consumer_inputs_with_locations,
2148 0,
2149 sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
2150
2151 foreach_in_list(ir_instruction, node, ir) {
2152 ir_variable *const input_var = node->as_variable();
2153
2154 if (input_var != NULL && input_var->data.mode == ir_var_shader_in) {
2155 /* All interface blocks should have been lowered by this point */
2156 assert(!input_var->type->is_interface());
2157
2158 if (input_var->data.explicit_location) {
2159 /* assign_varying_locations only cares about finding the
2160 * ir_variable at the start of a contiguous location block.
2161 *
2162 * - For !producer, consumer_inputs_with_locations isn't used.
2163 *
2164 * - For !consumer, consumer_inputs_with_locations is empty.
2165 *
2166 * For consumer && producer, if you were trying to set some
2167 * ir_variable to the middle of a location block on the other side
2168 * of producer/consumer, cross_validate_outputs_to_inputs() should
2169 * be link-erroring due to either type mismatch or location
2170 * overlaps. If the variables do match up, then they've got a
2171 * matching data.location and you only looked at
2172 * consumer_inputs_with_locations[var->data.location], not any
2173 * following entries for the array/structure.
2174 */
2175 consumer_inputs_with_locations[input_var->data.location] =
2176 input_var;
2177 } else if (input_var->get_interface_type() != NULL) {
2178 char *const iface_field_name =
2179 ralloc_asprintf(mem_ctx, "%s.%s",
2180 input_var->get_interface_type()->without_array()->name,
2181 input_var->name);
2182 _mesa_hash_table_insert(consumer_interface_inputs,
2183 iface_field_name, input_var);
2184 } else {
2185 _mesa_hash_table_insert(consumer_inputs,
2186 ralloc_strdup(mem_ctx, input_var->name),
2187 input_var);
2188 }
2189 }
2190 }
2191 }
2192
2193 /**
2194 * Find a variable from the consumer that "matches" the specified variable
2195 *
2196 * This function only finds inputs with names that match. There is no
2197 * validation (here) that the types, etc. are compatible.
2198 */
2199 ir_variable *
2200 get_matching_input(void *mem_ctx,
2201 const ir_variable *output_var,
2202 hash_table *consumer_inputs,
2203 hash_table *consumer_interface_inputs,
2204 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2205 {
2206 ir_variable *input_var;
2207
2208 if (output_var->data.explicit_location) {
2209 input_var = consumer_inputs_with_locations[output_var->data.location];
2210 } else if (output_var->get_interface_type() != NULL) {
2211 char *const iface_field_name =
2212 ralloc_asprintf(mem_ctx, "%s.%s",
2213 output_var->get_interface_type()->without_array()->name,
2214 output_var->name);
2215 hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
2216 input_var = entry ? (ir_variable *) entry->data : NULL;
2217 } else {
2218 hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name);
2219 input_var = entry ? (ir_variable *) entry->data : NULL;
2220 }
2221
2222 return (input_var == NULL || input_var->data.mode != ir_var_shader_in)
2223 ? NULL : input_var;
2224 }
2225
2226 }
2227
2228 static int
2229 io_variable_cmp(const void *_a, const void *_b)
2230 {
2231 const ir_variable *const a = *(const ir_variable **) _a;
2232 const ir_variable *const b = *(const ir_variable **) _b;
2233
2234 if (a->data.explicit_location && b->data.explicit_location)
2235 return b->data.location - a->data.location;
2236
2237 if (a->data.explicit_location && !b->data.explicit_location)
2238 return 1;
2239
2240 if (!a->data.explicit_location && b->data.explicit_location)
2241 return -1;
2242
2243 return -strcmp(a->name, b->name);
2244 }
2245
2246 /**
2247 * Sort the shader IO variables into canonical order
2248 */
2249 static void
2250 canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
2251 {
2252 ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
2253 unsigned num_variables = 0;
2254
2255 foreach_in_list(ir_instruction, node, ir) {
2256 ir_variable *const var = node->as_variable();
2257
2258 if (var == NULL || var->data.mode != io_mode)
2259 continue;
2260
2261 /* If we have already encountered more I/O variables that could
2262 * successfully link, bail.
2263 */
2264 if (num_variables == ARRAY_SIZE(var_table))
2265 return;
2266
2267 var_table[num_variables++] = var;
2268 }
2269
2270 if (num_variables == 0)
2271 return;
2272
2273 /* Sort the list in reverse order (io_variable_cmp handles this). Later
2274 * we're going to push the variables on to the IR list as a stack, so we
2275 * want the last variable (in canonical order) to be first in the list.
2276 */
2277 qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
2278
2279 /* Remove the variable from it's current location in the IR, and put it at
2280 * the front.
2281 */
2282 for (unsigned i = 0; i < num_variables; i++) {
2283 var_table[i]->remove();
2284 ir->push_head(var_table[i]);
2285 }
2286 }
2287
2288 /**
2289 * Generate a bitfield map of the explicit locations for shader varyings.
2290 *
2291 * Note: For Tessellation shaders we are sitting right on the limits of the
2292 * 64 bit map. Per-vertex and per-patch both have separate location domains
2293 * with a max of MAX_VARYING.
2294 */
2295 static uint64_t
2296 reserved_varying_slot(struct gl_linked_shader *stage,
2297 ir_variable_mode io_mode)
2298 {
2299 assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
2300 /* Avoid an overflow of the returned value */
2301 assert(MAX_VARYINGS_INCL_PATCH <= 64);
2302
2303 uint64_t slots = 0;
2304 int var_slot;
2305
2306 if (!stage)
2307 return slots;
2308
2309 foreach_in_list(ir_instruction, node, stage->ir) {
2310 ir_variable *const var = node->as_variable();
2311
2312 if (var == NULL || var->data.mode != io_mode ||
2313 !var->data.explicit_location ||
2314 var->data.location < VARYING_SLOT_VAR0)
2315 continue;
2316
2317 var_slot = var->data.location - VARYING_SLOT_VAR0;
2318
2319 unsigned num_elements = get_varying_type(var, stage->Stage)
2320 ->count_attribute_slots(io_mode == ir_var_shader_in &&
2321 stage->Stage == MESA_SHADER_VERTEX);
2322 for (unsigned i = 0; i < num_elements; i++) {
2323 if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
2324 slots |= UINT64_C(1) << var_slot;
2325 var_slot += 1;
2326 }
2327 }
2328
2329 return slots;
2330 }
2331
2332
2333 /**
2334 * Assign locations for all variables that are produced in one pipeline stage
2335 * (the "producer") and consumed in the next stage (the "consumer").
2336 *
2337 * Variables produced by the producer may also be consumed by transform
2338 * feedback.
2339 *
2340 * \param num_tfeedback_decls is the number of declarations indicating
2341 * variables that may be consumed by transform feedback.
2342 *
2343 * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects
2344 * representing the result of parsing the strings passed to
2345 * glTransformFeedbackVaryings(). assign_location() will be called for
2346 * each of these objects that matches one of the outputs of the
2347 * producer.
2348 *
2349 * When num_tfeedback_decls is nonzero, it is permissible for the consumer to
2350 * be NULL. In this case, varying locations are assigned solely based on the
2351 * requirements of transform feedback.
2352 */
2353 static bool
2354 assign_varying_locations(struct gl_context *ctx,
2355 void *mem_ctx,
2356 struct gl_shader_program *prog,
2357 gl_linked_shader *producer,
2358 gl_linked_shader *consumer,
2359 unsigned num_tfeedback_decls,
2360 tfeedback_decl *tfeedback_decls,
2361 const uint64_t reserved_slots)
2362 {
2363 /* Tessellation shaders treat inputs and outputs as shared memory and can
2364 * access inputs and outputs of other invocations.
2365 * Therefore, they can't be lowered to temps easily (and definitely not
2366 * efficiently).
2367 */
2368 bool unpackable_tess =
2369 (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
2370 (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
2371 (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
2372
2373 /* Transform feedback code assumes varying arrays are packed, so if the
2374 * driver has disabled varying packing, make sure to at least enable
2375 * packing required by transform feedback.
2376 */
2377 bool xfb_enabled =
2378 ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
2379
2380 /* Disable packing on outward facing interfaces for SSO because in ES we
2381 * need to retain the unpacked varying information for draw time
2382 * validation.
2383 *
2384 * Packing is still enabled on individual arrays, structs, and matrices as
2385 * these are required by the transform feedback code and it is still safe
2386 * to do so. We also enable packing when a varying is only used for
2387 * transform feedback and its not a SSO.
2388 */
2389 bool disable_varying_packing =
2390 ctx->Const.DisableVaryingPacking || unpackable_tess;
2391 if (prog->SeparateShader && (producer == NULL || consumer == NULL))
2392 disable_varying_packing = true;
2393
2394 varying_matches matches(disable_varying_packing, xfb_enabled,
2395 ctx->Extensions.ARB_enhanced_layouts,
2396 producer ? producer->Stage : MESA_SHADER_NONE,
2397 consumer ? consumer->Stage : MESA_SHADER_NONE);
2398 hash_table *tfeedback_candidates =
2399 _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2400 _mesa_key_string_equal);
2401 hash_table *consumer_inputs =
2402 _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2403 _mesa_key_string_equal);
2404 hash_table *consumer_interface_inputs =
2405 _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2406 _mesa_key_string_equal);
2407 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
2408 NULL,
2409 };
2410
2411 unsigned consumer_vertices = 0;
2412 if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
2413 consumer_vertices = prog->Geom.VerticesIn;
2414
2415 /* Operate in a total of four passes.
2416 *
2417 * 1. Sort inputs / outputs into a canonical order. This is necessary so
2418 * that inputs / outputs of separable shaders will be assigned
2419 * predictable locations regardless of the order in which declarations
2420 * appeared in the shader source.
2421 *
2422 * 2. Assign locations for any matching inputs and outputs.
2423 *
2424 * 3. Mark output variables in the producer that do not have locations as
2425 * not being outputs. This lets the optimizer eliminate them.
2426 *
2427 * 4. Mark input variables in the consumer that do not have locations as
2428 * not being inputs. This lets the optimizer eliminate them.
2429 */
2430 if (consumer)
2431 canonicalize_shader_io(consumer->ir, ir_var_shader_in);
2432
2433 if (producer)
2434 canonicalize_shader_io(producer->ir, ir_var_shader_out);
2435
2436 if (consumer)
2437 linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
2438 consumer_inputs,
2439 consumer_interface_inputs,
2440 consumer_inputs_with_locations);
2441
2442 if (producer) {
2443 foreach_in_list(ir_instruction, node, producer->ir) {
2444 ir_variable *const output_var = node->as_variable();
2445
2446 if (output_var == NULL || output_var->data.mode != ir_var_shader_out)
2447 continue;
2448
2449 /* Only geometry shaders can use non-zero streams */
2450 assert(output_var->data.stream == 0 ||
2451 (output_var->data.stream < MAX_VERTEX_STREAMS &&
2452 producer->Stage == MESA_SHADER_GEOMETRY));
2453
2454 if (num_tfeedback_decls > 0) {
2455 tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
2456 g.process(output_var);
2457 }
2458
2459 ir_variable *const input_var =
2460 linker::get_matching_input(mem_ctx, output_var, consumer_inputs,
2461 consumer_interface_inputs,
2462 consumer_inputs_with_locations);
2463
2464 /* If a matching input variable was found, add this output (and the
2465 * input) to the set. If this is a separable program and there is no
2466 * consumer stage, add the output.
2467 *
2468 * Always add TCS outputs. They are shared by all invocations
2469 * within a patch and can be used as shared memory.
2470 */
2471 if (input_var || (prog->SeparateShader && consumer == NULL) ||
2472 producer->Stage == MESA_SHADER_TESS_CTRL) {
2473 matches.record(output_var, input_var);
2474 }
2475
2476 /* Only stream 0 outputs can be consumed in the next stage */
2477 if (input_var && output_var->data.stream != 0) {
2478 linker_error(prog, "output %s is assigned to stream=%d but "
2479 "is linked to an input, which requires stream=0",
2480 output_var->name, output_var->data.stream);
2481 return false;
2482 }
2483 }
2484 } else {
2485 /* If there's no producer stage, then this must be a separable program.
2486 * For example, we may have a program that has just a fragment shader.
2487 * Later this program will be used with some arbitrary vertex (or
2488 * geometry) shader program. This means that locations must be assigned
2489 * for all the inputs.
2490 */
2491 foreach_in_list(ir_instruction, node, consumer->ir) {
2492 ir_variable *const input_var = node->as_variable();
2493
2494 if (input_var == NULL || input_var->data.mode != ir_var_shader_in)
2495 continue;
2496
2497 matches.record(NULL, input_var);
2498 }
2499 }
2500
2501 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2502 if (!tfeedback_decls[i].is_varying())
2503 continue;
2504
2505 const tfeedback_candidate *matched_candidate
2506 = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
2507
2508 if (matched_candidate == NULL) {
2509 _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2510 return false;
2511 }
2512
2513 /* Mark xfb varyings as always active */
2514 matched_candidate->toplevel_var->data.always_active_io = 1;
2515
2516 /* Mark any corresponding inputs as always active also. We must do this
2517 * because we have a NIR pass that lowers vectors to scalars and another
2518 * that removes unused varyings.
2519 * We don't split varyings marked as always active because there is no
2520 * point in doing so. This means we need to mark both sides of the
2521 * interface as always active otherwise we will have a mismatch and
2522 * start removing things we shouldn't.
2523 */
2524 ir_variable *const input_var =
2525 linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var,
2526 consumer_inputs,
2527 consumer_interface_inputs,
2528 consumer_inputs_with_locations);
2529 if (input_var)
2530 input_var->data.always_active_io = 1;
2531
2532 if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
2533 matched_candidate->toplevel_var->data.is_xfb_only = 1;
2534 matches.record(matched_candidate->toplevel_var, NULL);
2535 }
2536 }
2537
2538 _mesa_hash_table_destroy(consumer_inputs, NULL);
2539 _mesa_hash_table_destroy(consumer_interface_inputs, NULL);
2540
2541 uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
2542 const unsigned slots_used = matches.assign_locations(
2543 prog, components, reserved_slots);
2544 matches.store_locations();
2545
2546 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2547 if (!tfeedback_decls[i].is_varying())
2548 continue;
2549
2550 if (!tfeedback_decls[i].assign_location(ctx, prog)) {
2551 _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2552 return false;
2553 }
2554 }
2555 _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2556
2557 if (consumer && producer) {
2558 foreach_in_list(ir_instruction, node, consumer->ir) {
2559 ir_variable *const var = node->as_variable();
2560
2561 if (var && var->data.mode == ir_var_shader_in &&
2562 var->data.is_unmatched_generic_inout) {
2563 if (!prog->IsES && prog->data->Version <= 120) {
2564 /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
2565 *
2566 * Only those varying variables used (i.e. read) in
2567 * the fragment shader executable must be written to
2568 * by the vertex shader executable; declaring
2569 * superfluous varying variables in a vertex shader is
2570 * permissible.
2571 *
2572 * We interpret this text as meaning that the VS must
2573 * write the variable for the FS to read it. See
2574 * "glsl1-varying read but not written" in piglit.
2575 */
2576 linker_error(prog, "%s shader varying %s not written "
2577 "by %s shader\n.",
2578 _mesa_shader_stage_to_string(consumer->Stage),
2579 var->name,
2580 _mesa_shader_stage_to_string(producer->Stage));
2581 } else {
2582 linker_warning(prog, "%s shader varying %s not written "
2583 "by %s shader\n.",
2584 _mesa_shader_stage_to_string(consumer->Stage),
2585 var->name,
2586 _mesa_shader_stage_to_string(producer->Stage));
2587 }
2588 }
2589 }
2590
2591 /* Now that validation is done its safe to remove unused varyings. As
2592 * we have both a producer and consumer its safe to remove unused
2593 * varyings even if the program is a SSO because the stages are being
2594 * linked together i.e. we have a multi-stage SSO.
2595 */
2596 remove_unused_shader_inputs_and_outputs(false, producer,
2597 ir_var_shader_out);
2598 remove_unused_shader_inputs_and_outputs(false, consumer,
2599 ir_var_shader_in);
2600 }
2601
2602 if (producer) {
2603 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
2604 0, producer, disable_varying_packing,
2605 xfb_enabled);
2606 }
2607
2608 if (consumer) {
2609 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
2610 consumer_vertices, consumer,
2611 disable_varying_packing, xfb_enabled);
2612 }
2613
2614 return true;
2615 }
2616
2617 static bool
2618 check_against_output_limit(struct gl_context *ctx,
2619 struct gl_shader_program *prog,
2620 gl_linked_shader *producer,
2621 unsigned num_explicit_locations)
2622 {
2623 unsigned output_vectors = num_explicit_locations;
2624
2625 foreach_in_list(ir_instruction, node, producer->ir) {
2626 ir_variable *const var = node->as_variable();
2627
2628 if (var && !var->data.explicit_location &&
2629 var->data.mode == ir_var_shader_out &&
2630 var_counts_against_varying_limit(producer->Stage, var)) {
2631 /* outputs for fragment shader can't be doubles */
2632 output_vectors += var->type->count_attribute_slots(false);
2633 }
2634 }
2635
2636 assert(producer->Stage != MESA_SHADER_FRAGMENT);
2637 unsigned max_output_components =
2638 ctx->Const.Program[producer->Stage].MaxOutputComponents;
2639
2640 const unsigned output_components = output_vectors * 4;
2641 if (output_components > max_output_components) {
2642 if (ctx->API == API_OPENGLES2 || prog->IsES)
2643 linker_error(prog, "%s shader uses too many output vectors "
2644 "(%u > %u)\n",
2645 _mesa_shader_stage_to_string(producer->Stage),
2646 output_vectors,
2647 max_output_components / 4);
2648 else
2649 linker_error(prog, "%s shader uses too many output components "
2650 "(%u > %u)\n",
2651 _mesa_shader_stage_to_string(producer->Stage),
2652 output_components,
2653 max_output_components);
2654
2655 return false;
2656 }
2657
2658 return true;
2659 }
2660
2661 static bool
2662 check_against_input_limit(struct gl_context *ctx,
2663 struct gl_shader_program *prog,
2664 gl_linked_shader *consumer,
2665 unsigned num_explicit_locations)
2666 {
2667 unsigned input_vectors = num_explicit_locations;
2668
2669 foreach_in_list(ir_instruction, node, consumer->ir) {
2670 ir_variable *const var = node->as_variable();
2671
2672 if (var && !var->data.explicit_location &&
2673 var->data.mode == ir_var_shader_in &&
2674 var_counts_against_varying_limit(consumer->Stage, var)) {
2675 /* vertex inputs aren't varying counted */
2676 input_vectors += var->type->count_attribute_slots(false);
2677 }
2678 }
2679
2680 assert(consumer->Stage != MESA_SHADER_VERTEX);
2681 unsigned max_input_components =
2682 ctx->Const.Program[consumer->Stage].MaxInputComponents;
2683
2684 const unsigned input_components = input_vectors * 4;
2685 if (input_components > max_input_components) {
2686 if (ctx->API == API_OPENGLES2 || prog->IsES)
2687 linker_error(prog, "%s shader uses too many input vectors "
2688 "(%u > %u)\n",
2689 _mesa_shader_stage_to_string(consumer->Stage),
2690 input_vectors,
2691 max_input_components / 4);
2692 else
2693 linker_error(prog, "%s shader uses too many input components "
2694 "(%u > %u)\n",
2695 _mesa_shader_stage_to_string(consumer->Stage),
2696 input_components,
2697 max_input_components);
2698
2699 return false;
2700 }
2701
2702 return true;
2703 }
2704
2705 bool
2706 link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
2707 struct gl_context *ctx, void *mem_ctx)
2708 {
2709 bool has_xfb_qualifiers = false;
2710 unsigned num_tfeedback_decls = 0;
2711 char **varying_names = NULL;
2712 tfeedback_decl *tfeedback_decls = NULL;
2713
2714 /* From the ARB_enhanced_layouts spec:
2715 *
2716 * "If the shader used to record output variables for transform feedback
2717 * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
2718 * qualifiers, the values specified by TransformFeedbackVaryings are
2719 * ignored, and the set of variables captured for transform feedback is
2720 * instead derived from the specified layout qualifiers."
2721 */
2722 for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
2723 /* Find last stage before fragment shader */
2724 if (prog->_LinkedShaders[i]) {
2725 has_xfb_qualifiers =
2726 process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
2727 prog, &num_tfeedback_decls,
2728 &varying_names);
2729 break;
2730 }
2731 }
2732
2733 if (!has_xfb_qualifiers) {
2734 num_tfeedback_decls = prog->TransformFeedback.NumVarying;
2735 varying_names = prog->TransformFeedback.VaryingNames;
2736 }
2737
2738 if (num_tfeedback_decls != 0) {
2739 /* From GL_EXT_transform_feedback:
2740 * A program will fail to link if:
2741 *
2742 * * the <count> specified by TransformFeedbackVaryingsEXT is
2743 * non-zero, but the program object has no vertex or geometry
2744 * shader;
2745 */
2746 if (first >= MESA_SHADER_FRAGMENT) {
2747 linker_error(prog, "Transform feedback varyings specified, but "
2748 "no vertex, tessellation, or geometry shader is "
2749 "present.\n");
2750 return false;
2751 }
2752
2753 tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,
2754 num_tfeedback_decls);
2755 if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls,
2756 varying_names, tfeedback_decls))
2757 return false;
2758 }
2759
2760 /* If there is no fragment shader we need to set transform feedback.
2761 *
2762 * For SSO we also need to assign output locations. We assign them here
2763 * because we need to do it for both single stage programs and multi stage
2764 * programs.
2765 */
2766 if (last < MESA_SHADER_FRAGMENT &&
2767 (num_tfeedback_decls != 0 || prog->SeparateShader)) {
2768 const uint64_t reserved_out_slots =
2769 reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out);
2770 if (!assign_varying_locations(ctx, mem_ctx, prog,
2771 prog->_LinkedShaders[last], NULL,
2772 num_tfeedback_decls, tfeedback_decls,
2773 reserved_out_slots))
2774 return false;
2775 }
2776
2777 if (last <= MESA_SHADER_FRAGMENT) {
2778 /* Remove unused varyings from the first/last stage unless SSO */
2779 remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
2780 prog->_LinkedShaders[first],
2781 ir_var_shader_in);
2782 remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
2783 prog->_LinkedShaders[last],
2784 ir_var_shader_out);
2785
2786 /* If the program is made up of only a single stage */
2787 if (first == last) {
2788 gl_linked_shader *const sh = prog->_LinkedShaders[last];
2789
2790 do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
2791 do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
2792 tfeedback_decls);
2793
2794 if (prog->SeparateShader) {
2795 const uint64_t reserved_slots =
2796 reserved_varying_slot(sh, ir_var_shader_in);
2797
2798 /* Assign input locations for SSO, output locations are already
2799 * assigned.
2800 */
2801 if (!assign_varying_locations(ctx, mem_ctx, prog,
2802 NULL /* producer */,
2803 sh /* consumer */,
2804 0 /* num_tfeedback_decls */,
2805 NULL /* tfeedback_decls */,
2806 reserved_slots))
2807 return false;
2808 }
2809 } else {
2810 /* Linking the stages in the opposite order (from fragment to vertex)
2811 * ensures that inter-shader outputs written to in an earlier stage
2812 * are eliminated if they are (transitively) not used in a later
2813 * stage.
2814 */
2815 int next = last;
2816 for (int i = next - 1; i >= 0; i--) {
2817 if (prog->_LinkedShaders[i] == NULL && i != 0)
2818 continue;
2819
2820 gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
2821 gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
2822
2823 const uint64_t reserved_out_slots =
2824 reserved_varying_slot(sh_i, ir_var_shader_out);
2825 const uint64_t reserved_in_slots =
2826 reserved_varying_slot(sh_next, ir_var_shader_in);
2827
2828 do_dead_builtin_varyings(ctx, sh_i, sh_next,
2829 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
2830 tfeedback_decls);
2831
2832 if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
2833 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
2834 tfeedback_decls,
2835 reserved_out_slots | reserved_in_slots))
2836 return false;
2837
2838 /* This must be done after all dead varyings are eliminated. */
2839 if (sh_i != NULL) {
2840 unsigned slots_used = _mesa_bitcount_64(reserved_out_slots);
2841 if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) {
2842 return false;
2843 }
2844 }
2845
2846 unsigned slots_used = _mesa_bitcount_64(reserved_in_slots);
2847 if (!check_against_input_limit(ctx, prog, sh_next, slots_used))
2848 return false;
2849
2850 next = i;
2851 }
2852 }
2853 }
2854
2855 if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
2856 has_xfb_qualifiers))
2857 return false;
2858
2859 return true;
2860 }