glsl: Transform fb buffers are only active if a variable uses them
[mesa.git] / src / compiler / glsl / link_varyings.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file link_varyings.cpp
26 *
27 * Linker functions related specifically to linking varyings between shader
28 * stages.
29 */
30
31
32 #include "main/mtypes.h"
33 #include "glsl_symbol_table.h"
34 #include "glsl_parser_extras.h"
35 #include "ir_optimization.h"
36 #include "linker.h"
37 #include "link_varyings.h"
38 #include "main/macros.h"
39 #include "util/hash_table.h"
40 #include "program.h"
41
42
43 /**
44 * Get the varying type stripped of the outermost array if we're processing
45 * a stage whose varyings are arrays indexed by a vertex number (such as
46 * geometry shader inputs).
47 */
48 static const glsl_type *
49 get_varying_type(const ir_variable *var, gl_shader_stage stage)
50 {
51 const glsl_type *type = var->type;
52
53 if (!var->data.patch &&
54 ((var->data.mode == ir_var_shader_out &&
55 stage == MESA_SHADER_TESS_CTRL) ||
56 (var->data.mode == ir_var_shader_in &&
57 (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
58 stage == MESA_SHADER_GEOMETRY)))) {
59 assert(type->is_array());
60 type = type->fields.array;
61 }
62
63 return type;
64 }
65
66 static void
67 create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
68 size_t name_length, unsigned *count,
69 const char *ifc_member_name,
70 const glsl_type *ifc_member_t, char ***varying_names)
71 {
72 if (t->is_interface()) {
73 size_t new_length = name_length;
74
75 assert(ifc_member_name && ifc_member_t);
76 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
77
78 create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
79 NULL, NULL, varying_names);
80 } else if (t->is_record()) {
81 for (unsigned i = 0; i < t->length; i++) {
82 const char *field = t->fields.structure[i].name;
83 size_t new_length = name_length;
84
85 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
86
87 create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name,
88 new_length, count, NULL, NULL,
89 varying_names);
90 }
91 } else if (t->without_array()->is_record() ||
92 t->without_array()->is_interface() ||
93 (t->is_array() && t->fields.array->is_array())) {
94 for (unsigned i = 0; i < t->length; i++) {
95 size_t new_length = name_length;
96
97 /* Append the subscript to the current variable name */
98 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
99
100 create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length,
101 count, ifc_member_name, ifc_member_t,
102 varying_names);
103 }
104 } else {
105 (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
106 }
107 }
108
109 static bool
110 process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
111 struct gl_shader_program *prog,
112 unsigned *num_tfeedback_decls,
113 char ***varying_names)
114 {
115 bool has_xfb_qualifiers = false;
116
117 /* We still need to enable transform feedback mode even if xfb_stride is
118 * only applied to a global out. Also we don't bother to propagate
119 * xfb_stride to interface block members so this will catch that case also.
120 */
121 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
122 if (prog->TransformFeedback.BufferStride[j]) {
123 has_xfb_qualifiers = true;
124 break;
125 }
126 }
127
128 foreach_in_list(ir_instruction, node, sh->ir) {
129 ir_variable *var = node->as_variable();
130 if (!var || var->data.mode != ir_var_shader_out)
131 continue;
132
133 /* From the ARB_enhanced_layouts spec:
134 *
135 * "Any shader making any static use (after preprocessing) of any of
136 * these *xfb_* qualifiers will cause the shader to be in a
137 * transform feedback capturing mode and hence responsible for
138 * describing the transform feedback setup. This mode will capture
139 * any output selected by *xfb_offset*, directly or indirectly, to
140 * a transform feedback buffer."
141 */
142 if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
143 has_xfb_qualifiers = true;
144 }
145
146 if (var->data.explicit_xfb_offset) {
147 *num_tfeedback_decls += var->type->varying_count();
148 has_xfb_qualifiers = true;
149 }
150 }
151
152 if (*num_tfeedback_decls == 0)
153 return has_xfb_qualifiers;
154
155 unsigned i = 0;
156 *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls);
157 foreach_in_list(ir_instruction, node, sh->ir) {
158 ir_variable *var = node->as_variable();
159 if (!var || var->data.mode != ir_var_shader_out)
160 continue;
161
162 if (var->data.explicit_xfb_offset) {
163 char *name;
164 const glsl_type *type, *member_type;
165
166 if (var->data.from_named_ifc_block) {
167 type = var->get_interface_type();
168 /* Find the member type before it was altered by lowering */
169 member_type =
170 type->fields.structure[type->field_index(var->name)].type;
171 name = ralloc_strdup(NULL, type->without_array()->name);
172 } else {
173 type = var->type;
174 member_type = NULL;
175 name = ralloc_strdup(NULL, var->name);
176 }
177 create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
178 var->name, member_type, varying_names);
179 ralloc_free(name);
180 }
181 }
182
183 assert(i == *num_tfeedback_decls);
184 return has_xfb_qualifiers;
185 }
186
187 /**
188 * Validate the types and qualifiers of an output from one stage against the
189 * matching input to another stage.
190 */
191 static void
192 cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
193 const ir_variable *input,
194 const ir_variable *output,
195 gl_shader_stage consumer_stage,
196 gl_shader_stage producer_stage)
197 {
198 /* Check that the types match between stages.
199 */
200 const glsl_type *type_to_match = input->type;
201
202 /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
203 const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
204 consumer_stage != MESA_SHADER_FRAGMENT) ||
205 consumer_stage == MESA_SHADER_GEOMETRY;
206 if (extra_array_level) {
207 assert(type_to_match->is_array());
208 type_to_match = type_to_match->fields.array;
209 }
210
211 if (type_to_match != output->type) {
212 /* There is a bit of a special case for gl_TexCoord. This
213 * built-in is unsized by default. Applications that variable
214 * access it must redeclare it with a size. There is some
215 * language in the GLSL spec that implies the fragment shader
216 * and vertex shader do not have to agree on this size. Other
217 * driver behave this way, and one or two applications seem to
218 * rely on it.
219 *
220 * Neither declaration needs to be modified here because the array
221 * sizes are fixed later when update_array_sizes is called.
222 *
223 * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
224 *
225 * "Unlike user-defined varying variables, the built-in
226 * varying variables don't have a strict one-to-one
227 * correspondence between the vertex language and the
228 * fragment language."
229 */
230 if (!output->type->is_array() || !is_gl_identifier(output->name)) {
231 linker_error(prog,
232 "%s shader output `%s' declared as type `%s', "
233 "but %s shader input declared as type `%s'\n",
234 _mesa_shader_stage_to_string(producer_stage),
235 output->name,
236 output->type->name,
237 _mesa_shader_stage_to_string(consumer_stage),
238 input->type->name);
239 return;
240 }
241 }
242
243 /* Check that all of the qualifiers match between stages.
244 */
245
246 /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
247 * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
248 * conformance test suite does not verify that the qualifiers must match.
249 * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
250 * OpenGLES 3.0 drivers, so we relax the checking in all cases.
251 */
252 if (false /* always skip the centroid check */ &&
253 prog->data->Version < (prog->IsES ? 310 : 430) &&
254 input->data.centroid != output->data.centroid) {
255 linker_error(prog,
256 "%s shader output `%s' %s centroid qualifier, "
257 "but %s shader input %s centroid qualifier\n",
258 _mesa_shader_stage_to_string(producer_stage),
259 output->name,
260 (output->data.centroid) ? "has" : "lacks",
261 _mesa_shader_stage_to_string(consumer_stage),
262 (input->data.centroid) ? "has" : "lacks");
263 return;
264 }
265
266 if (input->data.sample != output->data.sample) {
267 linker_error(prog,
268 "%s shader output `%s' %s sample qualifier, "
269 "but %s shader input %s sample qualifier\n",
270 _mesa_shader_stage_to_string(producer_stage),
271 output->name,
272 (output->data.sample) ? "has" : "lacks",
273 _mesa_shader_stage_to_string(consumer_stage),
274 (input->data.sample) ? "has" : "lacks");
275 return;
276 }
277
278 if (input->data.patch != output->data.patch) {
279 linker_error(prog,
280 "%s shader output `%s' %s patch qualifier, "
281 "but %s shader input %s patch qualifier\n",
282 _mesa_shader_stage_to_string(producer_stage),
283 output->name,
284 (output->data.patch) ? "has" : "lacks",
285 _mesa_shader_stage_to_string(consumer_stage),
286 (input->data.patch) ? "has" : "lacks");
287 return;
288 }
289
290 /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
291 *
292 * "As only outputs need be declared with invariant, an output from
293 * one shader stage will still match an input of a subsequent stage
294 * without the input being declared as invariant."
295 *
296 * while GLSL 4.20 says:
297 *
298 * "For variables leaving one shader and coming into another shader,
299 * the invariant keyword has to be used in both shaders, or a link
300 * error will result."
301 *
302 * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
303 *
304 * "The invariance of varyings that are declared in both the vertex
305 * and fragment shaders must match."
306 */
307 if (input->data.invariant != output->data.invariant &&
308 prog->data->Version < (prog->IsES ? 300 : 430)) {
309 linker_error(prog,
310 "%s shader output `%s' %s invariant qualifier, "
311 "but %s shader input %s invariant qualifier\n",
312 _mesa_shader_stage_to_string(producer_stage),
313 output->name,
314 (output->data.invariant) ? "has" : "lacks",
315 _mesa_shader_stage_to_string(consumer_stage),
316 (input->data.invariant) ? "has" : "lacks");
317 return;
318 }
319
320 /* GLSL >= 4.40 removes text requiring interpolation qualifiers
321 * to match cross stage, they must only match within the same stage.
322 *
323 * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
324 *
325 * "It is a link-time error if, within the same stage, the interpolation
326 * qualifiers of variables of the same name do not match.
327 *
328 * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
329 *
330 * "When no interpolation qualifier is present, smooth interpolation
331 * is used."
332 *
333 * So we match variables where one is smooth and the other has no explicit
334 * qualifier.
335 */
336 unsigned input_interpolation = input->data.interpolation;
337 unsigned output_interpolation = output->data.interpolation;
338 if (prog->IsES) {
339 if (input_interpolation == INTERP_MODE_NONE)
340 input_interpolation = INTERP_MODE_SMOOTH;
341 if (output_interpolation == INTERP_MODE_NONE)
342 output_interpolation = INTERP_MODE_SMOOTH;
343 }
344 if (input_interpolation != output_interpolation &&
345 prog->data->Version < 440) {
346 linker_error(prog,
347 "%s shader output `%s' specifies %s "
348 "interpolation qualifier, "
349 "but %s shader input specifies %s "
350 "interpolation qualifier\n",
351 _mesa_shader_stage_to_string(producer_stage),
352 output->name,
353 interpolation_string(output->data.interpolation),
354 _mesa_shader_stage_to_string(consumer_stage),
355 interpolation_string(input->data.interpolation));
356 return;
357 }
358 }
359
360 /**
361 * Validate front and back color outputs against single color input
362 */
363 static void
364 cross_validate_front_and_back_color(struct gl_shader_program *prog,
365 const ir_variable *input,
366 const ir_variable *front_color,
367 const ir_variable *back_color,
368 gl_shader_stage consumer_stage,
369 gl_shader_stage producer_stage)
370 {
371 if (front_color != NULL && front_color->data.assigned)
372 cross_validate_types_and_qualifiers(prog, input, front_color,
373 consumer_stage, producer_stage);
374
375 if (back_color != NULL && back_color->data.assigned)
376 cross_validate_types_and_qualifiers(prog, input, back_color,
377 consumer_stage, producer_stage);
378 }
379
380 static unsigned
381 compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
382 {
383 unsigned location_start = VARYING_SLOT_VAR0;
384
385 switch (stage) {
386 case MESA_SHADER_VERTEX:
387 if (var->data.mode == ir_var_shader_in)
388 location_start = VERT_ATTRIB_GENERIC0;
389 break;
390 case MESA_SHADER_TESS_CTRL:
391 case MESA_SHADER_TESS_EVAL:
392 if (var->data.patch)
393 location_start = VARYING_SLOT_PATCH0;
394 break;
395 case MESA_SHADER_FRAGMENT:
396 if (var->data.mode == ir_var_shader_out)
397 location_start = FRAG_RESULT_DATA0;
398 break;
399 default:
400 break;
401 }
402
403 return var->data.location - location_start;
404 }
405
406 struct explicit_location_info {
407 ir_variable *var;
408 unsigned numerical_type;
409 unsigned interpolation;
410 bool centroid;
411 bool sample;
412 bool patch;
413 };
414
415 static inline unsigned
416 get_numerical_type(const glsl_type *type)
417 {
418 /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 68,
419 * (Location aliasing):
420 *
421 * "Further, when location aliasing, the aliases sharing the location
422 * must have the same underlying numerical type (floating-point or
423 * integer)
424 */
425 if (type->is_float() || type->is_double())
426 return GLSL_TYPE_FLOAT;
427 return GLSL_TYPE_INT;
428 }
429
430 static bool
431 check_location_aliasing(struct explicit_location_info explicit_locations[][4],
432 ir_variable *var,
433 unsigned location,
434 unsigned component,
435 unsigned location_limit,
436 const glsl_type *type,
437 unsigned interpolation,
438 bool centroid,
439 bool sample,
440 bool patch,
441 gl_shader_program *prog,
442 gl_shader_stage stage)
443 {
444 unsigned last_comp;
445 if (type->without_array()->is_record()) {
446 /* The component qualifier can't be used on structs so just treat
447 * all component slots as used.
448 */
449 last_comp = 4;
450 } else {
451 unsigned dmul = type->without_array()->is_64bit() ? 2 : 1;
452 last_comp = component + type->without_array()->vector_elements * dmul;
453 }
454
455 while (location < location_limit) {
456 unsigned comp = 0;
457 while (comp < 4) {
458 struct explicit_location_info *info =
459 &explicit_locations[location][comp];
460
461 if (info->var) {
462 /* Component aliasing is not alloed */
463 if (comp >= component && comp < last_comp) {
464 linker_error(prog,
465 "%s shader has multiple outputs explicitly "
466 "assigned to location %d and component %d\n",
467 _mesa_shader_stage_to_string(stage),
468 location, comp);
469 return false;
470 } else {
471 /* For all other used components we need to have matching
472 * types, interpolation and auxiliary storage
473 */
474 if (info->numerical_type !=
475 get_numerical_type(type->without_array())) {
476 linker_error(prog,
477 "Varyings sharing the same location must "
478 "have the same underlying numerical type. "
479 "Location %u component %u\n",
480 location, comp);
481 return false;
482 }
483
484 if (info->interpolation != interpolation) {
485 linker_error(prog,
486 "%s shader has multiple outputs at explicit "
487 "location %u with different interpolation "
488 "settings\n",
489 _mesa_shader_stage_to_string(stage), location);
490 return false;
491 }
492
493 if (info->centroid != centroid ||
494 info->sample != sample ||
495 info->patch != patch) {
496 linker_error(prog,
497 "%s shader has multiple outputs at explicit "
498 "location %u with different aux storage\n",
499 _mesa_shader_stage_to_string(stage), location);
500 return false;
501 }
502 }
503 } else if (comp >= component && comp < last_comp) {
504 info->var = var;
505 info->numerical_type = get_numerical_type(type->without_array());
506 info->interpolation = interpolation;
507 info->centroid = centroid;
508 info->sample = sample;
509 info->patch = patch;
510 }
511
512 comp++;
513
514 /* We need to do some special handling for doubles as dvec3 and
515 * dvec4 consume two consecutive locations. We don't need to
516 * worry about components beginning at anything other than 0 as
517 * the spec does not allow this for dvec3 and dvec4.
518 */
519 if (comp == 4 && last_comp > 4) {
520 last_comp = last_comp - 4;
521 /* Bump location index and reset the component index */
522 location++;
523 comp = 0;
524 component = 0;
525 }
526 }
527
528 location++;
529 }
530
531 return true;
532 }
533
534 static bool
535 validate_explicit_variable_location(struct gl_context *ctx,
536 struct explicit_location_info explicit_locations[][4],
537 ir_variable *var,
538 gl_shader_program *prog,
539 gl_linked_shader *sh)
540 {
541 const glsl_type *type = get_varying_type(var, sh->Stage);
542 unsigned num_elements = type->count_attribute_slots(false);
543 unsigned idx = compute_variable_location_slot(var, sh->Stage);
544 unsigned slot_limit = idx + num_elements;
545
546 /* Vertex shader inputs and fragment shader outputs are validated in
547 * assign_attribute_or_color_locations() so we should not attempt to
548 * validate them again here.
549 */
550 unsigned slot_max;
551 if (var->data.mode == ir_var_shader_out) {
552 assert(sh->Stage != MESA_SHADER_FRAGMENT);
553 slot_max =
554 ctx->Const.Program[sh->Stage].MaxOutputComponents / 4;
555 } else {
556 assert(var->data.mode == ir_var_shader_in);
557 assert(sh->Stage != MESA_SHADER_VERTEX);
558 slot_max =
559 ctx->Const.Program[sh->Stage].MaxInputComponents / 4;
560 }
561
562 if (slot_limit > slot_max) {
563 linker_error(prog,
564 "Invalid location %u in %s shader\n",
565 idx, _mesa_shader_stage_to_string(sh->Stage));
566 return false;
567 }
568
569 if (type->without_array()->is_interface()) {
570 for (unsigned i = 0; i < type->without_array()->length; i++) {
571 glsl_struct_field *field = &type->fields.structure[i];
572 unsigned field_location = field->location -
573 (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
574 if (!check_location_aliasing(explicit_locations, var,
575 field_location,
576 0, field_location + 1,
577 field->type,
578 field->interpolation,
579 field->centroid,
580 field->sample,
581 field->patch,
582 prog, sh->Stage)) {
583 return false;
584 }
585 }
586 } else if (!check_location_aliasing(explicit_locations, var,
587 idx, var->data.location_frac,
588 slot_limit, type,
589 var->data.interpolation,
590 var->data.centroid,
591 var->data.sample,
592 var->data.patch,
593 prog, sh->Stage)) {
594 return false;
595 }
596
597 return true;
598 }
599
600 /**
601 * Validate explicit locations for the inputs to the first stage and the
602 * outputs of the last stage in an SSO program (everything in between is
603 * validated in cross_validate_outputs_to_inputs).
604 */
605 void
606 validate_sso_explicit_locations(struct gl_context *ctx,
607 struct gl_shader_program *prog,
608 gl_shader_stage first_stage,
609 gl_shader_stage last_stage)
610 {
611 assert(prog->SeparateShader);
612
613 /* VS inputs and FS outputs are validated in
614 * assign_attribute_or_color_locations()
615 */
616 bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
617 bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
618 if (!validate_first_stage && !validate_last_stage)
619 return;
620
621 struct explicit_location_info explicit_locations[MAX_VARYING][4];
622
623 gl_shader_stage stages[2] = { first_stage, last_stage };
624 bool validate_stage[2] = { validate_first_stage, validate_last_stage };
625 ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out };
626
627 for (unsigned i = 0; i < 2; i++) {
628 if (!validate_stage[i])
629 continue;
630
631 gl_shader_stage stage = stages[i];
632
633 gl_linked_shader *sh = prog->_LinkedShaders[stage];
634 assert(sh);
635
636 memset(explicit_locations, 0, sizeof(explicit_locations));
637
638 foreach_in_list(ir_instruction, node, sh->ir) {
639 ir_variable *const var = node->as_variable();
640
641 if (var == NULL ||
642 !var->data.explicit_location ||
643 var->data.location < VARYING_SLOT_VAR0 ||
644 var->data.mode != var_direction[i])
645 continue;
646
647 if (!validate_explicit_variable_location(
648 ctx, explicit_locations, var, prog, sh)) {
649 return;
650 }
651 }
652 }
653 }
654
655 /**
656 * Validate that outputs from one stage match inputs of another
657 */
658 void
659 cross_validate_outputs_to_inputs(struct gl_context *ctx,
660 struct gl_shader_program *prog,
661 gl_linked_shader *producer,
662 gl_linked_shader *consumer)
663 {
664 glsl_symbol_table parameters;
665 struct explicit_location_info explicit_locations[MAX_VARYING][4] = { 0 };
666
667 /* Find all shader outputs in the "producer" stage.
668 */
669 foreach_in_list(ir_instruction, node, producer->ir) {
670 ir_variable *const var = node->as_variable();
671
672 if (var == NULL || var->data.mode != ir_var_shader_out)
673 continue;
674
675 if (!var->data.explicit_location
676 || var->data.location < VARYING_SLOT_VAR0)
677 parameters.add_variable(var);
678 else {
679 /* User-defined varyings with explicit locations are handled
680 * differently because they do not need to have matching names.
681 */
682 if (!validate_explicit_variable_location(ctx,
683 explicit_locations,
684 var, prog, producer)) {
685 return;
686 }
687 }
688 }
689
690
691 /* Find all shader inputs in the "consumer" stage. Any variables that have
692 * matching outputs already in the symbol table must have the same type and
693 * qualifiers.
694 *
695 * Exception: if the consumer is the geometry shader, then the inputs
696 * should be arrays and the type of the array element should match the type
697 * of the corresponding producer output.
698 */
699 foreach_in_list(ir_instruction, node, consumer->ir) {
700 ir_variable *const input = node->as_variable();
701
702 if (input == NULL || input->data.mode != ir_var_shader_in)
703 continue;
704
705 if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
706 const ir_variable *const front_color =
707 parameters.get_variable("gl_FrontColor");
708
709 const ir_variable *const back_color =
710 parameters.get_variable("gl_BackColor");
711
712 cross_validate_front_and_back_color(prog, input,
713 front_color, back_color,
714 consumer->Stage, producer->Stage);
715 } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
716 const ir_variable *const front_color =
717 parameters.get_variable("gl_FrontSecondaryColor");
718
719 const ir_variable *const back_color =
720 parameters.get_variable("gl_BackSecondaryColor");
721
722 cross_validate_front_and_back_color(prog, input,
723 front_color, back_color,
724 consumer->Stage, producer->Stage);
725 } else {
726 /* The rules for connecting inputs and outputs change in the presence
727 * of explicit locations. In this case, we no longer care about the
728 * names of the variables. Instead, we care only about the
729 * explicitly assigned location.
730 */
731 ir_variable *output = NULL;
732 if (input->data.explicit_location
733 && input->data.location >= VARYING_SLOT_VAR0) {
734
735 const glsl_type *type = get_varying_type(input, consumer->Stage);
736 unsigned num_elements = type->count_attribute_slots(false);
737 unsigned idx =
738 compute_variable_location_slot(input, consumer->Stage);
739 unsigned slot_limit = idx + num_elements;
740
741 while (idx < slot_limit) {
742 if (idx >= MAX_VARYING) {
743 linker_error(prog,
744 "Invalid location %u in %s shader\n", idx,
745 _mesa_shader_stage_to_string(consumer->Stage));
746 return;
747 }
748
749 output = explicit_locations[idx][input->data.location_frac].var;
750
751 if (output == NULL ||
752 input->data.location != output->data.location) {
753 linker_error(prog,
754 "%s shader input `%s' with explicit location "
755 "has no matching output\n",
756 _mesa_shader_stage_to_string(consumer->Stage),
757 input->name);
758 break;
759 }
760 idx++;
761 }
762 } else {
763 output = parameters.get_variable(input->name);
764 }
765
766 if (output != NULL) {
767 /* Interface blocks have their own validation elsewhere so don't
768 * try validating them here.
769 */
770 if (!(input->get_interface_type() &&
771 output->get_interface_type()))
772 cross_validate_types_and_qualifiers(prog, input, output,
773 consumer->Stage,
774 producer->Stage);
775 } else {
776 /* Check for input vars with unmatched output vars in prev stage
777 * taking into account that interface blocks could have a matching
778 * output but with different name, so we ignore them.
779 */
780 assert(!input->data.assigned);
781 if (input->data.used && !input->get_interface_type() &&
782 !input->data.explicit_location && !prog->SeparateShader)
783 linker_error(prog,
784 "%s shader input `%s' "
785 "has no matching output in the previous stage\n",
786 _mesa_shader_stage_to_string(consumer->Stage),
787 input->name);
788 }
789 }
790 }
791 }
792
793 /**
794 * Demote shader inputs and outputs that are not used in other stages, and
795 * remove them via dead code elimination.
796 */
797 static void
798 remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
799 gl_linked_shader *sh,
800 enum ir_variable_mode mode)
801 {
802 if (is_separate_shader_object)
803 return;
804
805 foreach_in_list(ir_instruction, node, sh->ir) {
806 ir_variable *const var = node->as_variable();
807
808 if (var == NULL || var->data.mode != int(mode))
809 continue;
810
811 /* A shader 'in' or 'out' variable is only really an input or output if
812 * its value is used by other shader stages. This will cause the
813 * variable to have a location assigned.
814 */
815 if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
816 assert(var->data.mode != ir_var_temporary);
817
818 /* Assign zeros to demoted inputs to allow more optimizations. */
819 if (var->data.mode == ir_var_shader_in && !var->constant_value)
820 var->constant_value = ir_constant::zero(var, var->type);
821
822 var->data.mode = ir_var_auto;
823 }
824 }
825
826 /* Eliminate code that is now dead due to unused inputs/outputs being
827 * demoted.
828 */
829 while (do_dead_code(sh->ir, false))
830 ;
831
832 }
833
834 /**
835 * Initialize this object based on a string that was passed to
836 * glTransformFeedbackVaryings.
837 *
838 * If the input is mal-formed, this call still succeeds, but it sets
839 * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var()
840 * will fail to find any matching variable.
841 */
842 void
843 tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx,
844 const char *input)
845 {
846 /* We don't have to be pedantic about what is a valid GLSL variable name,
847 * because any variable with an invalid name can't exist in the IR anyway.
848 */
849
850 this->location = -1;
851 this->orig_name = input;
852 this->lowered_builtin_array_variable = none;
853 this->skip_components = 0;
854 this->next_buffer_separator = false;
855 this->matched_candidate = NULL;
856 this->stream_id = 0;
857 this->buffer = 0;
858 this->offset = 0;
859
860 if (ctx->Extensions.ARB_transform_feedback3) {
861 /* Parse gl_NextBuffer. */
862 if (strcmp(input, "gl_NextBuffer") == 0) {
863 this->next_buffer_separator = true;
864 return;
865 }
866
867 /* Parse gl_SkipComponents. */
868 if (strcmp(input, "gl_SkipComponents1") == 0)
869 this->skip_components = 1;
870 else if (strcmp(input, "gl_SkipComponents2") == 0)
871 this->skip_components = 2;
872 else if (strcmp(input, "gl_SkipComponents3") == 0)
873 this->skip_components = 3;
874 else if (strcmp(input, "gl_SkipComponents4") == 0)
875 this->skip_components = 4;
876
877 if (this->skip_components)
878 return;
879 }
880
881 /* Parse a declaration. */
882 const char *base_name_end;
883 long subscript = parse_program_resource_name(input, &base_name_end);
884 this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
885 if (this->var_name == NULL) {
886 _mesa_error_no_memory(__func__);
887 return;
888 }
889
890 if (subscript >= 0) {
891 this->array_subscript = subscript;
892 this->is_subscripted = true;
893 } else {
894 this->is_subscripted = false;
895 }
896
897 /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
898 * class must behave specially to account for the fact that gl_ClipDistance
899 * is converted from a float[8] to a vec4[2].
900 */
901 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
902 strcmp(this->var_name, "gl_ClipDistance") == 0) {
903 this->lowered_builtin_array_variable = clip_distance;
904 }
905 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
906 strcmp(this->var_name, "gl_CullDistance") == 0) {
907 this->lowered_builtin_array_variable = cull_distance;
908 }
909
910 if (ctx->Const.LowerTessLevel &&
911 (strcmp(this->var_name, "gl_TessLevelOuter") == 0))
912 this->lowered_builtin_array_variable = tess_level_outer;
913 if (ctx->Const.LowerTessLevel &&
914 (strcmp(this->var_name, "gl_TessLevelInner") == 0))
915 this->lowered_builtin_array_variable = tess_level_inner;
916 }
917
918
919 /**
920 * Determine whether two tfeedback_decl objects refer to the same variable and
921 * array index (if applicable).
922 */
923 bool
924 tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y)
925 {
926 assert(x.is_varying() && y.is_varying());
927
928 if (strcmp(x.var_name, y.var_name) != 0)
929 return false;
930 if (x.is_subscripted != y.is_subscripted)
931 return false;
932 if (x.is_subscripted && x.array_subscript != y.array_subscript)
933 return false;
934 return true;
935 }
936
937
938 /**
939 * Assign a location and stream ID for this tfeedback_decl object based on the
940 * transform feedback candidate found by find_candidate.
941 *
942 * If an error occurs, the error is reported through linker_error() and false
943 * is returned.
944 */
945 bool
946 tfeedback_decl::assign_location(struct gl_context *ctx,
947 struct gl_shader_program *prog)
948 {
949 assert(this->is_varying());
950
951 unsigned fine_location
952 = this->matched_candidate->toplevel_var->data.location * 4
953 + this->matched_candidate->toplevel_var->data.location_frac
954 + this->matched_candidate->offset;
955 const unsigned dmul =
956 this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
957
958 if (this->matched_candidate->type->is_array()) {
959 /* Array variable */
960 const unsigned matrix_cols =
961 this->matched_candidate->type->fields.array->matrix_columns;
962 const unsigned vector_elements =
963 this->matched_candidate->type->fields.array->vector_elements;
964 unsigned actual_array_size;
965 switch (this->lowered_builtin_array_variable) {
966 case clip_distance:
967 actual_array_size = prog->last_vert_prog ?
968 prog->last_vert_prog->info.clip_distance_array_size : 0;
969 break;
970 case cull_distance:
971 actual_array_size = prog->last_vert_prog ?
972 prog->last_vert_prog->info.cull_distance_array_size : 0;
973 break;
974 case tess_level_outer:
975 actual_array_size = 4;
976 break;
977 case tess_level_inner:
978 actual_array_size = 2;
979 break;
980 case none:
981 default:
982 actual_array_size = this->matched_candidate->type->array_size();
983 break;
984 }
985
986 if (this->is_subscripted) {
987 /* Check array bounds. */
988 if (this->array_subscript >= actual_array_size) {
989 linker_error(prog, "Transform feedback varying %s has index "
990 "%i, but the array size is %u.",
991 this->orig_name, this->array_subscript,
992 actual_array_size);
993 return false;
994 }
995 unsigned array_elem_size = this->lowered_builtin_array_variable ?
996 1 : vector_elements * matrix_cols * dmul;
997 fine_location += array_elem_size * this->array_subscript;
998 this->size = 1;
999 } else {
1000 this->size = actual_array_size;
1001 }
1002 this->vector_elements = vector_elements;
1003 this->matrix_columns = matrix_cols;
1004 if (this->lowered_builtin_array_variable)
1005 this->type = GL_FLOAT;
1006 else
1007 this->type = this->matched_candidate->type->fields.array->gl_type;
1008 } else {
1009 /* Regular variable (scalar, vector, or matrix) */
1010 if (this->is_subscripted) {
1011 linker_error(prog, "Transform feedback varying %s requested, "
1012 "but %s is not an array.",
1013 this->orig_name, this->var_name);
1014 return false;
1015 }
1016 this->size = 1;
1017 this->vector_elements = this->matched_candidate->type->vector_elements;
1018 this->matrix_columns = this->matched_candidate->type->matrix_columns;
1019 this->type = this->matched_candidate->type->gl_type;
1020 }
1021 this->location = fine_location / 4;
1022 this->location_frac = fine_location % 4;
1023
1024 /* From GL_EXT_transform_feedback:
1025 * A program will fail to link if:
1026 *
1027 * * the total number of components to capture in any varying
1028 * variable in <varyings> is greater than the constant
1029 * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1030 * buffer mode is SEPARATE_ATTRIBS_EXT;
1031 */
1032 if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1033 this->num_components() >
1034 ctx->Const.MaxTransformFeedbackSeparateComponents) {
1035 linker_error(prog, "Transform feedback varying %s exceeds "
1036 "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1037 this->orig_name);
1038 return false;
1039 }
1040
1041 /* Only transform feedback varyings can be assigned to non-zero streams,
1042 * so assign the stream id here.
1043 */
1044 this->stream_id = this->matched_candidate->toplevel_var->data.stream;
1045
1046 unsigned array_offset = this->array_subscript * 4 * dmul;
1047 unsigned struct_offset = this->matched_candidate->offset * 4 * dmul;
1048 this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer;
1049 this->offset = this->matched_candidate->toplevel_var->data.offset +
1050 array_offset + struct_offset;
1051
1052 return true;
1053 }
1054
1055
1056 unsigned
1057 tfeedback_decl::get_num_outputs() const
1058 {
1059 if (!this->is_varying()) {
1060 return 0;
1061 }
1062 return (this->num_components() + this->location_frac + 3)/4;
1063 }
1064
1065
1066 /**
1067 * Update gl_transform_feedback_info to reflect this tfeedback_decl.
1068 *
1069 * If an error occurs, the error is reported through linker_error() and false
1070 * is returned.
1071 */
1072 bool
1073 tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
1074 struct gl_transform_feedback_info *info,
1075 unsigned buffer, unsigned buffer_index,
1076 const unsigned max_outputs, bool *explicit_stride,
1077 bool has_xfb_qualifiers) const
1078 {
1079 unsigned xfb_offset = 0;
1080 unsigned size = this->size;
1081 /* Handle gl_SkipComponents. */
1082 if (this->skip_components) {
1083 info->Buffers[buffer].Stride += this->skip_components;
1084 size = this->skip_components;
1085 goto store_varying;
1086 }
1087
1088 if (this->next_buffer_separator) {
1089 size = 0;
1090 goto store_varying;
1091 }
1092
1093 if (has_xfb_qualifiers) {
1094 xfb_offset = this->offset / 4;
1095 } else {
1096 xfb_offset = info->Buffers[buffer].Stride;
1097 }
1098 info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1099
1100 {
1101 unsigned location = this->location;
1102 unsigned location_frac = this->location_frac;
1103 unsigned num_components = this->num_components();
1104 while (num_components > 0) {
1105 unsigned output_size = MIN2(num_components, 4 - location_frac);
1106 assert((info->NumOutputs == 0 && max_outputs == 0) ||
1107 info->NumOutputs < max_outputs);
1108
1109 /* From the ARB_enhanced_layouts spec:
1110 *
1111 * "If such a block member or variable is not written during a shader
1112 * invocation, the buffer contents at the assigned offset will be
1113 * undefined. Even if there are no static writes to a variable or
1114 * member that is assigned a transform feedback offset, the space is
1115 * still allocated in the buffer and still affects the stride."
1116 */
1117 if (this->is_varying_written()) {
1118 info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1119 info->Outputs[info->NumOutputs].OutputRegister = location;
1120 info->Outputs[info->NumOutputs].NumComponents = output_size;
1121 info->Outputs[info->NumOutputs].StreamId = stream_id;
1122 info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1123 info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1124 ++info->NumOutputs;
1125 }
1126 info->Buffers[buffer].Stream = this->stream_id;
1127 xfb_offset += output_size;
1128
1129 num_components -= output_size;
1130 location++;
1131 location_frac = 0;
1132 }
1133 }
1134
1135 if (explicit_stride && explicit_stride[buffer]) {
1136 if (this->is_64bit() && info->Buffers[buffer].Stride % 2) {
1137 linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
1138 "multiple of 8 as its applied to a type that is or "
1139 "contains a double.",
1140 info->Buffers[buffer].Stride * 4);
1141 return false;
1142 }
1143
1144 if ((this->offset / 4) / info->Buffers[buffer].Stride !=
1145 (xfb_offset - 1) / info->Buffers[buffer].Stride) {
1146 linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
1147 "buffer (%d)", xfb_offset * 4,
1148 info->Buffers[buffer].Stride * 4, buffer);
1149 return false;
1150 }
1151 } else {
1152 info->Buffers[buffer].Stride = xfb_offset;
1153 }
1154
1155 /* From GL_EXT_transform_feedback:
1156 * A program will fail to link if:
1157 *
1158 * * the total number of components to capture is greater than
1159 * the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1160 * and the buffer mode is INTERLEAVED_ATTRIBS_EXT.
1161 *
1162 * From GL_ARB_enhanced_layouts:
1163 *
1164 * "The resulting stride (implicit or explicit) must be less than or
1165 * equal to the implementation-dependent constant
1166 * gl_MaxTransformFeedbackInterleavedComponents."
1167 */
1168 if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1169 has_xfb_qualifiers) &&
1170 info->Buffers[buffer].Stride >
1171 ctx->Const.MaxTransformFeedbackInterleavedComponents) {
1172 linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1173 "limit has been exceeded.");
1174 return false;
1175 }
1176
1177 store_varying:
1178 info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
1179 this->orig_name);
1180 info->Varyings[info->NumVarying].Type = this->type;
1181 info->Varyings[info->NumVarying].Size = size;
1182 info->Varyings[info->NumVarying].BufferIndex = buffer_index;
1183 info->NumVarying++;
1184 info->Buffers[buffer].NumVaryings++;
1185
1186 return true;
1187 }
1188
1189
1190 const tfeedback_candidate *
1191 tfeedback_decl::find_candidate(gl_shader_program *prog,
1192 hash_table *tfeedback_candidates)
1193 {
1194 const char *name = this->var_name;
1195 switch (this->lowered_builtin_array_variable) {
1196 case none:
1197 name = this->var_name;
1198 break;
1199 case clip_distance:
1200 name = "gl_ClipDistanceMESA";
1201 break;
1202 case cull_distance:
1203 name = "gl_CullDistanceMESA";
1204 break;
1205 case tess_level_outer:
1206 name = "gl_TessLevelOuterMESA";
1207 break;
1208 case tess_level_inner:
1209 name = "gl_TessLevelInnerMESA";
1210 break;
1211 }
1212 hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name);
1213
1214 this->matched_candidate = entry ?
1215 (const tfeedback_candidate *) entry->data : NULL;
1216
1217 if (!this->matched_candidate) {
1218 /* From GL_EXT_transform_feedback:
1219 * A program will fail to link if:
1220 *
1221 * * any variable name specified in the <varyings> array is not
1222 * declared as an output in the geometry shader (if present) or
1223 * the vertex shader (if no geometry shader is present);
1224 */
1225 linker_error(prog, "Transform feedback varying %s undeclared.",
1226 this->orig_name);
1227 }
1228
1229 return this->matched_candidate;
1230 }
1231
1232
1233 /**
1234 * Parse all the transform feedback declarations that were passed to
1235 * glTransformFeedbackVaryings() and store them in tfeedback_decl objects.
1236 *
1237 * If an error occurs, the error is reported through linker_error() and false
1238 * is returned.
1239 */
1240 static bool
1241 parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
1242 const void *mem_ctx, unsigned num_names,
1243 char **varying_names, tfeedback_decl *decls)
1244 {
1245 for (unsigned i = 0; i < num_names; ++i) {
1246 decls[i].init(ctx, mem_ctx, varying_names[i]);
1247
1248 if (!decls[i].is_varying())
1249 continue;
1250
1251 /* From GL_EXT_transform_feedback:
1252 * A program will fail to link if:
1253 *
1254 * * any two entries in the <varyings> array specify the same varying
1255 * variable;
1256 *
1257 * We interpret this to mean "any two entries in the <varyings> array
1258 * specify the same varying variable and array index", since transform
1259 * feedback of arrays would be useless otherwise.
1260 */
1261 for (unsigned j = 0; j < i; ++j) {
1262 if (!decls[j].is_varying())
1263 continue;
1264
1265 if (tfeedback_decl::is_same(decls[i], decls[j])) {
1266 linker_error(prog, "Transform feedback varying %s specified "
1267 "more than once.", varying_names[i]);
1268 return false;
1269 }
1270 }
1271 }
1272 return true;
1273 }
1274
1275
1276 static int
1277 cmp_xfb_offset(const void * x_generic, const void * y_generic)
1278 {
1279 tfeedback_decl *x = (tfeedback_decl *) x_generic;
1280 tfeedback_decl *y = (tfeedback_decl *) y_generic;
1281
1282 if (x->get_buffer() != y->get_buffer())
1283 return x->get_buffer() - y->get_buffer();
1284 return x->get_offset() - y->get_offset();
1285 }
1286
1287 /**
1288 * Store transform feedback location assignments into
1289 * prog->sh.LinkedTransformFeedback based on the data stored in
1290 * tfeedback_decls.
1291 *
1292 * If an error occurs, the error is reported through linker_error() and false
1293 * is returned.
1294 */
1295 static bool
1296 store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
1297 unsigned num_tfeedback_decls,
1298 tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers)
1299 {
1300 if (!prog->last_vert_prog)
1301 return true;
1302
1303 /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
1304 * tracking the number of buffers doesn't overflow.
1305 */
1306 assert(ctx->Const.MaxTransformFeedbackBuffers < 32);
1307
1308 bool separate_attribs_mode =
1309 prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
1310
1311 struct gl_program *xfb_prog = prog->last_vert_prog;
1312 xfb_prog->sh.LinkedTransformFeedback =
1313 rzalloc(xfb_prog, struct gl_transform_feedback_info);
1314
1315 /* The xfb_offset qualifier does not have to be used in increasing order
1316 * however some drivers expect to receive the list of transform feedback
1317 * declarations in order so sort it now for convenience.
1318 */
1319 if (has_xfb_qualifiers) {
1320 qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
1321 cmp_xfb_offset);
1322 } else {
1323 xfb_prog->sh.LinkedTransformFeedback->api_enabled = true;
1324 }
1325
1326 xfb_prog->sh.LinkedTransformFeedback->Varyings =
1327 rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
1328 num_tfeedback_decls);
1329
1330 unsigned num_outputs = 0;
1331 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1332 if (tfeedback_decls[i].is_varying_written())
1333 num_outputs += tfeedback_decls[i].get_num_outputs();
1334 }
1335
1336 xfb_prog->sh.LinkedTransformFeedback->Outputs =
1337 rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
1338 num_outputs);
1339
1340 unsigned num_buffers = 0;
1341 unsigned buffers = 0;
1342
1343 if (!has_xfb_qualifiers && separate_attribs_mode) {
1344 /* GL_SEPARATE_ATTRIBS */
1345 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1346 if (!tfeedback_decls[i].store(ctx, prog,
1347 xfb_prog->sh.LinkedTransformFeedback,
1348 num_buffers, num_buffers, num_outputs,
1349 NULL, has_xfb_qualifiers))
1350 return false;
1351
1352 buffers |= 1 << num_buffers;
1353 num_buffers++;
1354 }
1355 }
1356 else {
1357 /* GL_INVERLEAVED_ATTRIBS */
1358 int buffer_stream_id = -1;
1359 unsigned buffer =
1360 num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0;
1361 bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
1362
1363 /* Apply any xfb_stride global qualifiers */
1364 if (has_xfb_qualifiers) {
1365 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1366 if (prog->TransformFeedback.BufferStride[j]) {
1367 explicit_stride[j] = true;
1368 xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
1369 prog->TransformFeedback.BufferStride[j] / 4;
1370 }
1371 }
1372 }
1373
1374 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1375 if (has_xfb_qualifiers &&
1376 buffer != tfeedback_decls[i].get_buffer()) {
1377 /* we have moved to the next buffer so reset stream id */
1378 buffer_stream_id = -1;
1379 num_buffers++;
1380 }
1381
1382 if (tfeedback_decls[i].is_next_buffer_separator()) {
1383 if (!tfeedback_decls[i].store(ctx, prog,
1384 xfb_prog->sh.LinkedTransformFeedback,
1385 buffer, num_buffers, num_outputs,
1386 explicit_stride, has_xfb_qualifiers))
1387 return false;
1388 num_buffers++;
1389 buffer_stream_id = -1;
1390 continue;
1391 }
1392
1393 if (has_xfb_qualifiers) {
1394 buffer = tfeedback_decls[i].get_buffer();
1395 } else {
1396 buffer = num_buffers;
1397 }
1398
1399 if (tfeedback_decls[i].is_varying()) {
1400 if (buffer_stream_id == -1) {
1401 /* First varying writing to this buffer: remember its stream */
1402 buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
1403
1404 /* Only mark a buffer as active when there is a varying
1405 * attached to it. This behaviour is based on a revised version
1406 * of section 13.2.2 of the GL 4.6 spec.
1407 */
1408 buffers |= 1 << buffer;
1409 } else if (buffer_stream_id !=
1410 (int) tfeedback_decls[i].get_stream_id()) {
1411 /* Varying writes to the same buffer from a different stream */
1412 linker_error(prog,
1413 "Transform feedback can't capture varyings belonging "
1414 "to different vertex streams in a single buffer. "
1415 "Varying %s writes to buffer from stream %u, other "
1416 "varyings in the same buffer write from stream %u.",
1417 tfeedback_decls[i].name(),
1418 tfeedback_decls[i].get_stream_id(),
1419 buffer_stream_id);
1420 return false;
1421 }
1422 }
1423
1424 if (!tfeedback_decls[i].store(ctx, prog,
1425 xfb_prog->sh.LinkedTransformFeedback,
1426 buffer, num_buffers, num_outputs,
1427 explicit_stride, has_xfb_qualifiers))
1428 return false;
1429 }
1430 }
1431
1432 assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
1433
1434 xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
1435 return true;
1436 }
1437
1438 namespace {
1439
1440 /**
1441 * Data structure recording the relationship between outputs of one shader
1442 * stage (the "producer") and inputs of another (the "consumer").
1443 */
1444 class varying_matches
1445 {
1446 public:
1447 varying_matches(bool disable_varying_packing, bool xfb_enabled,
1448 bool enhanced_layouts_enabled,
1449 gl_shader_stage producer_stage,
1450 gl_shader_stage consumer_stage);
1451 ~varying_matches();
1452 void record(ir_variable *producer_var, ir_variable *consumer_var);
1453 unsigned assign_locations(struct gl_shader_program *prog,
1454 uint8_t *components,
1455 uint64_t reserved_slots);
1456 void store_locations() const;
1457
1458 private:
1459 bool is_varying_packing_safe(const glsl_type *type,
1460 const ir_variable *var);
1461
1462 /**
1463 * If true, this driver disables varying packing, so all varyings need to
1464 * be aligned on slot boundaries, and take up a number of slots equal to
1465 * their number of matrix columns times their array size.
1466 *
1467 * Packing may also be disabled because our current packing method is not
1468 * safe in SSO or versions of OpenGL where interpolation qualifiers are not
1469 * guaranteed to match across stages.
1470 */
1471 const bool disable_varying_packing;
1472
1473 /**
1474 * If true, this driver has transform feedback enabled. The transform
1475 * feedback code requires at least some packing be done even when varying
1476 * packing is disabled, fortunately where transform feedback requires
1477 * packing it's safe to override the disabled setting. See
1478 * is_varying_packing_safe().
1479 */
1480 const bool xfb_enabled;
1481
1482 const bool enhanced_layouts_enabled;
1483
1484 /**
1485 * Enum representing the order in which varyings are packed within a
1486 * packing class.
1487 *
1488 * Currently we pack vec4's first, then vec2's, then scalar values, then
1489 * vec3's. This order ensures that the only vectors that are at risk of
1490 * having to be "double parked" (split between two adjacent varying slots)
1491 * are the vec3's.
1492 */
1493 enum packing_order_enum {
1494 PACKING_ORDER_VEC4,
1495 PACKING_ORDER_VEC2,
1496 PACKING_ORDER_SCALAR,
1497 PACKING_ORDER_VEC3,
1498 };
1499
1500 static unsigned compute_packing_class(const ir_variable *var);
1501 static packing_order_enum compute_packing_order(const ir_variable *var);
1502 static int match_comparator(const void *x_generic, const void *y_generic);
1503 static int xfb_comparator(const void *x_generic, const void *y_generic);
1504
1505 /**
1506 * Structure recording the relationship between a single producer output
1507 * and a single consumer input.
1508 */
1509 struct match {
1510 /**
1511 * Packing class for this varying, computed by compute_packing_class().
1512 */
1513 unsigned packing_class;
1514
1515 /**
1516 * Packing order for this varying, computed by compute_packing_order().
1517 */
1518 packing_order_enum packing_order;
1519 unsigned num_components;
1520
1521 /**
1522 * The output variable in the producer stage.
1523 */
1524 ir_variable *producer_var;
1525
1526 /**
1527 * The input variable in the consumer stage.
1528 */
1529 ir_variable *consumer_var;
1530
1531 /**
1532 * The location which has been assigned for this varying. This is
1533 * expressed in multiples of a float, with the first generic varying
1534 * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
1535 * value 0.
1536 */
1537 unsigned generic_location;
1538 } *matches;
1539
1540 /**
1541 * The number of elements in the \c matches array that are currently in
1542 * use.
1543 */
1544 unsigned num_matches;
1545
1546 /**
1547 * The number of elements that were set aside for the \c matches array when
1548 * it was allocated.
1549 */
1550 unsigned matches_capacity;
1551
1552 gl_shader_stage producer_stage;
1553 gl_shader_stage consumer_stage;
1554 };
1555
1556 } /* anonymous namespace */
1557
1558 varying_matches::varying_matches(bool disable_varying_packing,
1559 bool xfb_enabled,
1560 bool enhanced_layouts_enabled,
1561 gl_shader_stage producer_stage,
1562 gl_shader_stage consumer_stage)
1563 : disable_varying_packing(disable_varying_packing),
1564 xfb_enabled(xfb_enabled),
1565 enhanced_layouts_enabled(enhanced_layouts_enabled),
1566 producer_stage(producer_stage),
1567 consumer_stage(consumer_stage)
1568 {
1569 /* Note: this initial capacity is rather arbitrarily chosen to be large
1570 * enough for many cases without wasting an unreasonable amount of space.
1571 * varying_matches::record() will resize the array if there are more than
1572 * this number of varyings.
1573 */
1574 this->matches_capacity = 8;
1575 this->matches = (match *)
1576 malloc(sizeof(*this->matches) * this->matches_capacity);
1577 this->num_matches = 0;
1578 }
1579
1580
1581 varying_matches::~varying_matches()
1582 {
1583 free(this->matches);
1584 }
1585
1586
1587 /**
1588 * Packing is always safe on individual arrays, structures, and matrices. It
1589 * is also safe if the varying is only used for transform feedback.
1590 */
1591 bool
1592 varying_matches::is_varying_packing_safe(const glsl_type *type,
1593 const ir_variable *var)
1594 {
1595 if (consumer_stage == MESA_SHADER_TESS_EVAL ||
1596 consumer_stage == MESA_SHADER_TESS_CTRL ||
1597 producer_stage == MESA_SHADER_TESS_CTRL)
1598 return false;
1599
1600 return xfb_enabled && (type->is_array() || type->is_record() ||
1601 type->is_matrix() || var->data.is_xfb_only);
1602 }
1603
1604
1605 /**
1606 * Record the given producer/consumer variable pair in the list of variables
1607 * that should later be assigned locations.
1608 *
1609 * It is permissible for \c consumer_var to be NULL (this happens if a
1610 * variable is output by the producer and consumed by transform feedback, but
1611 * not consumed by the consumer).
1612 *
1613 * If \c producer_var has already been paired up with a consumer_var, or
1614 * producer_var is part of fixed pipeline functionality (and hence already has
1615 * a location assigned), this function has no effect.
1616 *
1617 * Note: as a side effect this function may change the interpolation type of
1618 * \c producer_var, but only when the change couldn't possibly affect
1619 * rendering.
1620 */
1621 void
1622 varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
1623 {
1624 assert(producer_var != NULL || consumer_var != NULL);
1625
1626 if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
1627 producer_var->data.explicit_location)) ||
1628 (consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
1629 consumer_var->data.explicit_location))) {
1630 /* Either a location already exists for this variable (since it is part
1631 * of fixed functionality), or it has already been recorded as part of a
1632 * previous match.
1633 */
1634 return;
1635 }
1636
1637 bool needs_flat_qualifier = consumer_var == NULL &&
1638 (producer_var->type->contains_integer() ||
1639 producer_var->type->contains_double());
1640
1641 if (!disable_varying_packing &&
1642 (needs_flat_qualifier ||
1643 (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
1644 /* Since this varying is not being consumed by the fragment shader, its
1645 * interpolation type varying cannot possibly affect rendering.
1646 * Also, this variable is non-flat and is (or contains) an integer
1647 * or a double.
1648 * If the consumer stage is unknown, don't modify the interpolation
1649 * type as it could affect rendering later with separate shaders.
1650 *
1651 * lower_packed_varyings requires all integer varyings to flat,
1652 * regardless of where they appear. We can trivially satisfy that
1653 * requirement by changing the interpolation type to flat here.
1654 */
1655 if (producer_var) {
1656 producer_var->data.centroid = false;
1657 producer_var->data.sample = false;
1658 producer_var->data.interpolation = INTERP_MODE_FLAT;
1659 }
1660
1661 if (consumer_var) {
1662 consumer_var->data.centroid = false;
1663 consumer_var->data.sample = false;
1664 consumer_var->data.interpolation = INTERP_MODE_FLAT;
1665 }
1666 }
1667
1668 if (this->num_matches == this->matches_capacity) {
1669 this->matches_capacity *= 2;
1670 this->matches = (match *)
1671 realloc(this->matches,
1672 sizeof(*this->matches) * this->matches_capacity);
1673 }
1674
1675 /* We must use the consumer to compute the packing class because in GL4.4+
1676 * there is no guarantee interpolation qualifiers will match across stages.
1677 *
1678 * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
1679 *
1680 * "The type and presence of interpolation qualifiers of variables with
1681 * the same name declared in all linked shaders for the same cross-stage
1682 * interface must match, otherwise the link command will fail.
1683 *
1684 * When comparing an output from one stage to an input of a subsequent
1685 * stage, the input and output don't match if their interpolation
1686 * qualifiers (or lack thereof) are not the same."
1687 *
1688 * This text was also in at least revison 7 of the 4.40 spec but is no
1689 * longer in revision 9 and not in the 4.50 spec.
1690 */
1691 const ir_variable *const var = (consumer_var != NULL)
1692 ? consumer_var : producer_var;
1693 const gl_shader_stage stage = (consumer_var != NULL)
1694 ? consumer_stage : producer_stage;
1695 const glsl_type *type = get_varying_type(var, stage);
1696
1697 if (producer_var && consumer_var &&
1698 consumer_var->data.must_be_shader_input) {
1699 producer_var->data.must_be_shader_input = 1;
1700 }
1701
1702 this->matches[this->num_matches].packing_class
1703 = this->compute_packing_class(var);
1704 this->matches[this->num_matches].packing_order
1705 = this->compute_packing_order(var);
1706 if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
1707 var->data.must_be_shader_input) {
1708 unsigned slots = type->count_attribute_slots(false);
1709 this->matches[this->num_matches].num_components = slots * 4;
1710 } else {
1711 this->matches[this->num_matches].num_components
1712 = type->component_slots();
1713 }
1714
1715 this->matches[this->num_matches].producer_var = producer_var;
1716 this->matches[this->num_matches].consumer_var = consumer_var;
1717 this->num_matches++;
1718 if (producer_var)
1719 producer_var->data.is_unmatched_generic_inout = 0;
1720 if (consumer_var)
1721 consumer_var->data.is_unmatched_generic_inout = 0;
1722 }
1723
1724
1725 /**
1726 * Choose locations for all of the variable matches that were previously
1727 * passed to varying_matches::record().
1728 */
1729 unsigned
1730 varying_matches::assign_locations(struct gl_shader_program *prog,
1731 uint8_t *components,
1732 uint64_t reserved_slots)
1733 {
1734 /* If packing has been disabled then we cannot safely sort the varyings by
1735 * class as it may mean we are using a version of OpenGL where
1736 * interpolation qualifiers are not guaranteed to be matching across
1737 * shaders, sorting in this case could result in mismatching shader
1738 * interfaces.
1739 * When packing is disabled the sort orders varyings used by transform
1740 * feedback first, but also depends on *undefined behaviour* of qsort to
1741 * reverse the order of the varyings. See: xfb_comparator().
1742 */
1743 if (!this->disable_varying_packing) {
1744 /* Sort varying matches into an order that makes them easy to pack. */
1745 qsort(this->matches, this->num_matches, sizeof(*this->matches),
1746 &varying_matches::match_comparator);
1747 } else {
1748 /* Only sort varyings that are only used by transform feedback. */
1749 qsort(this->matches, this->num_matches, sizeof(*this->matches),
1750 &varying_matches::xfb_comparator);
1751 }
1752
1753 unsigned generic_location = 0;
1754 unsigned generic_patch_location = MAX_VARYING*4;
1755 bool previous_var_xfb_only = false;
1756
1757 for (unsigned i = 0; i < this->num_matches; i++) {
1758 unsigned *location = &generic_location;
1759
1760 const ir_variable *var;
1761 const glsl_type *type;
1762 bool is_vertex_input = false;
1763 if (matches[i].consumer_var) {
1764 var = matches[i].consumer_var;
1765 type = get_varying_type(var, consumer_stage);
1766 if (consumer_stage == MESA_SHADER_VERTEX)
1767 is_vertex_input = true;
1768 } else {
1769 var = matches[i].producer_var;
1770 type = get_varying_type(var, producer_stage);
1771 }
1772
1773 if (var->data.patch)
1774 location = &generic_patch_location;
1775
1776 /* Advance to the next slot if this varying has a different packing
1777 * class than the previous one, and we're not already on a slot
1778 * boundary.
1779 *
1780 * Also advance to the next slot if packing is disabled. This makes sure
1781 * we don't assign varyings the same locations which is possible
1782 * because we still pack individual arrays, records and matrices even
1783 * when packing is disabled. Note we don't advance to the next slot if
1784 * we can pack varyings together that are only used for transform
1785 * feedback.
1786 */
1787 if (var->data.must_be_shader_input ||
1788 (this->disable_varying_packing &&
1789 !(previous_var_xfb_only && var->data.is_xfb_only)) ||
1790 (i > 0 && this->matches[i - 1].packing_class
1791 != this->matches[i].packing_class )) {
1792 *location = ALIGN(*location, 4);
1793 }
1794
1795 previous_var_xfb_only = var->data.is_xfb_only;
1796
1797 /* The number of components taken up by this variable. For vertex shader
1798 * inputs, we use the number of slots * 4, as they have different
1799 * counting rules.
1800 */
1801 unsigned num_components = is_vertex_input ?
1802 type->count_attribute_slots(is_vertex_input) * 4 :
1803 this->matches[i].num_components;
1804
1805 /* The last slot for this variable, inclusive. */
1806 unsigned slot_end = *location + num_components - 1;
1807
1808 /* FIXME: We could be smarter in the below code and loop back over
1809 * trying to fill any locations that we skipped because we couldn't pack
1810 * the varying between an explicit location. For now just let the user
1811 * hit the linking error if we run out of room and suggest they use
1812 * explicit locations.
1813 */
1814 while (slot_end < MAX_VARYING * 4u) {
1815 const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
1816 const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
1817
1818 assert(slots > 0);
1819 if (reserved_slots & slot_mask) {
1820 *location = ALIGN(*location + 1, 4);
1821 slot_end = *location + num_components - 1;
1822 continue;
1823 }
1824
1825 break;
1826 }
1827
1828 if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
1829 linker_error(prog, "insufficient contiguous locations available for "
1830 "%s it is possible an array or struct could not be "
1831 "packed between varyings with explicit locations. Try "
1832 "using an explicit location for arrays and structs.",
1833 var->name);
1834 }
1835
1836 if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
1837 for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
1838 components[j] = 4;
1839 components[slot_end / 4u] = (slot_end & 3) + 1;
1840 }
1841
1842 this->matches[i].generic_location = *location;
1843
1844 *location = slot_end + 1;
1845 }
1846
1847 return (generic_location + 3) / 4;
1848 }
1849
1850
1851 /**
1852 * Update the producer and consumer shaders to reflect the locations
1853 * assignments that were made by varying_matches::assign_locations().
1854 */
1855 void
1856 varying_matches::store_locations() const
1857 {
1858 /* Check is location needs to be packed with lower_packed_varyings() or if
1859 * we can just use ARB_enhanced_layouts packing.
1860 */
1861 bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 };
1862 const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
1863
1864 for (unsigned i = 0; i < this->num_matches; i++) {
1865 ir_variable *producer_var = this->matches[i].producer_var;
1866 ir_variable *consumer_var = this->matches[i].consumer_var;
1867 unsigned generic_location = this->matches[i].generic_location;
1868 unsigned slot = generic_location / 4;
1869 unsigned offset = generic_location % 4;
1870
1871 if (producer_var) {
1872 producer_var->data.location = VARYING_SLOT_VAR0 + slot;
1873 producer_var->data.location_frac = offset;
1874 }
1875
1876 if (consumer_var) {
1877 assert(consumer_var->data.location == -1);
1878 consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
1879 consumer_var->data.location_frac = offset;
1880 }
1881
1882 /* Find locations suitable for native packing via
1883 * ARB_enhanced_layouts.
1884 */
1885 if (producer_var && consumer_var) {
1886 if (enhanced_layouts_enabled) {
1887 const glsl_type *type =
1888 get_varying_type(producer_var, producer_stage);
1889 if (type->is_array() || type->is_matrix() || type->is_record() ||
1890 type->is_double()) {
1891 unsigned comp_slots = type->component_slots() + offset;
1892 unsigned slots = comp_slots / 4;
1893 if (comp_slots % 4)
1894 slots += 1;
1895
1896 for (unsigned j = 0; j < slots; j++) {
1897 pack_loc[slot + j] = true;
1898 }
1899 } else if (offset + type->vector_elements > 4) {
1900 pack_loc[slot] = true;
1901 pack_loc[slot + 1] = true;
1902 } else {
1903 loc_type[slot][offset] = type;
1904 }
1905 }
1906 }
1907 }
1908
1909 /* Attempt to use ARB_enhanced_layouts for more efficient packing if
1910 * suitable.
1911 */
1912 if (enhanced_layouts_enabled) {
1913 for (unsigned i = 0; i < this->num_matches; i++) {
1914 ir_variable *producer_var = this->matches[i].producer_var;
1915 ir_variable *consumer_var = this->matches[i].consumer_var;
1916 unsigned generic_location = this->matches[i].generic_location;
1917 unsigned slot = generic_location / 4;
1918
1919 if (pack_loc[slot] || !producer_var || !consumer_var)
1920 continue;
1921
1922 const glsl_type *type =
1923 get_varying_type(producer_var, producer_stage);
1924 bool type_match = true;
1925 for (unsigned j = 0; j < 4; j++) {
1926 if (loc_type[slot][j]) {
1927 if (type->base_type != loc_type[slot][j]->base_type)
1928 type_match = false;
1929 }
1930 }
1931
1932 if (type_match) {
1933 producer_var->data.explicit_location = 1;
1934 consumer_var->data.explicit_location = 1;
1935 producer_var->data.explicit_component = 1;
1936 consumer_var->data.explicit_component = 1;
1937 }
1938 }
1939 }
1940 }
1941
1942
1943 /**
1944 * Compute the "packing class" of the given varying. This is an unsigned
1945 * integer with the property that two variables in the same packing class can
1946 * be safely backed into the same vec4.
1947 */
1948 unsigned
1949 varying_matches::compute_packing_class(const ir_variable *var)
1950 {
1951 /* Without help from the back-end, there is no way to pack together
1952 * variables with different interpolation types, because
1953 * lower_packed_varyings must choose exactly one interpolation type for
1954 * each packed varying it creates.
1955 *
1956 * However, we can safely pack together floats, ints, and uints, because:
1957 *
1958 * - varyings of base type "int" and "uint" must use the "flat"
1959 * interpolation type, which can only occur in GLSL 1.30 and above.
1960 *
1961 * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
1962 * can store flat floats as ints without losing any information (using
1963 * the ir_unop_bitcast_* opcodes).
1964 *
1965 * Therefore, the packing class depends only on the interpolation type.
1966 */
1967 unsigned packing_class = var->data.centroid | (var->data.sample << 1) |
1968 (var->data.patch << 2) |
1969 (var->data.must_be_shader_input << 3);
1970 packing_class *= 8;
1971 packing_class += var->is_interpolation_flat()
1972 ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation;
1973 return packing_class;
1974 }
1975
1976
1977 /**
1978 * Compute the "packing order" of the given varying. This is a sort key we
1979 * use to determine when to attempt to pack the given varying relative to
1980 * other varyings in the same packing class.
1981 */
1982 varying_matches::packing_order_enum
1983 varying_matches::compute_packing_order(const ir_variable *var)
1984 {
1985 const glsl_type *element_type = var->type;
1986
1987 while (element_type->is_array()) {
1988 element_type = element_type->fields.array;
1989 }
1990
1991 switch (element_type->component_slots() % 4) {
1992 case 1: return PACKING_ORDER_SCALAR;
1993 case 2: return PACKING_ORDER_VEC2;
1994 case 3: return PACKING_ORDER_VEC3;
1995 case 0: return PACKING_ORDER_VEC4;
1996 default:
1997 assert(!"Unexpected value of vector_elements");
1998 return PACKING_ORDER_VEC4;
1999 }
2000 }
2001
2002
2003 /**
2004 * Comparison function passed to qsort() to sort varyings by packing_class and
2005 * then by packing_order.
2006 */
2007 int
2008 varying_matches::match_comparator(const void *x_generic, const void *y_generic)
2009 {
2010 const match *x = (const match *) x_generic;
2011 const match *y = (const match *) y_generic;
2012
2013 if (x->packing_class != y->packing_class)
2014 return x->packing_class - y->packing_class;
2015 return x->packing_order - y->packing_order;
2016 }
2017
2018
2019 /**
2020 * Comparison function passed to qsort() to sort varyings used only by
2021 * transform feedback when packing of other varyings is disabled.
2022 */
2023 int
2024 varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
2025 {
2026 const match *x = (const match *) x_generic;
2027
2028 if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
2029 return match_comparator(x_generic, y_generic);
2030
2031 /* FIXME: When the comparator returns 0 it means the elements being
2032 * compared are equivalent. However the qsort documentation says:
2033 *
2034 * "The order of equivalent elements is undefined."
2035 *
2036 * In practice the sort ends up reversing the order of the varyings which
2037 * means locations are also assigned in this reversed order and happens to
2038 * be what we want. This is also whats happening in
2039 * varying_matches::match_comparator().
2040 */
2041 return 0;
2042 }
2043
2044
2045 /**
2046 * Is the given variable a varying variable to be counted against the
2047 * limit in ctx->Const.MaxVarying?
2048 * This includes variables such as texcoords, colors and generic
2049 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
2050 */
2051 static bool
2052 var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var)
2053 {
2054 /* Only fragment shaders will take a varying variable as an input */
2055 if (stage == MESA_SHADER_FRAGMENT &&
2056 var->data.mode == ir_var_shader_in) {
2057 switch (var->data.location) {
2058 case VARYING_SLOT_POS:
2059 case VARYING_SLOT_FACE:
2060 case VARYING_SLOT_PNTC:
2061 return false;
2062 default:
2063 return true;
2064 }
2065 }
2066 return false;
2067 }
2068
2069
2070 /**
2071 * Visitor class that generates tfeedback_candidate structs describing all
2072 * possible targets of transform feedback.
2073 *
2074 * tfeedback_candidate structs are stored in the hash table
2075 * tfeedback_candidates, which is passed to the constructor. This hash table
2076 * maps varying names to instances of the tfeedback_candidate struct.
2077 */
2078 class tfeedback_candidate_generator : public program_resource_visitor
2079 {
2080 public:
2081 tfeedback_candidate_generator(void *mem_ctx,
2082 hash_table *tfeedback_candidates)
2083 : mem_ctx(mem_ctx),
2084 tfeedback_candidates(tfeedback_candidates),
2085 toplevel_var(NULL),
2086 varying_floats(0)
2087 {
2088 }
2089
2090 void process(ir_variable *var)
2091 {
2092 /* All named varying interface blocks should be flattened by now */
2093 assert(!var->is_interface_instance());
2094
2095 this->toplevel_var = var;
2096 this->varying_floats = 0;
2097 program_resource_visitor::process(var, false);
2098 }
2099
2100 private:
2101 virtual void visit_field(const glsl_type *type, const char *name,
2102 bool /* row_major */,
2103 const glsl_type * /* record_type */,
2104 const enum glsl_interface_packing,
2105 bool /* last_field */)
2106 {
2107 assert(!type->without_array()->is_record());
2108 assert(!type->without_array()->is_interface());
2109
2110 tfeedback_candidate *candidate
2111 = rzalloc(this->mem_ctx, tfeedback_candidate);
2112 candidate->toplevel_var = this->toplevel_var;
2113 candidate->type = type;
2114 candidate->offset = this->varying_floats;
2115 _mesa_hash_table_insert(this->tfeedback_candidates,
2116 ralloc_strdup(this->mem_ctx, name),
2117 candidate);
2118 this->varying_floats += type->component_slots();
2119 }
2120
2121 /**
2122 * Memory context used to allocate hash table keys and values.
2123 */
2124 void * const mem_ctx;
2125
2126 /**
2127 * Hash table in which tfeedback_candidate objects should be stored.
2128 */
2129 hash_table * const tfeedback_candidates;
2130
2131 /**
2132 * Pointer to the toplevel variable that is being traversed.
2133 */
2134 ir_variable *toplevel_var;
2135
2136 /**
2137 * Total number of varying floats that have been visited so far. This is
2138 * used to determine the offset to each varying within the toplevel
2139 * variable.
2140 */
2141 unsigned varying_floats;
2142 };
2143
2144
2145 namespace linker {
2146
2147 void
2148 populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
2149 hash_table *consumer_inputs,
2150 hash_table *consumer_interface_inputs,
2151 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2152 {
2153 memset(consumer_inputs_with_locations,
2154 0,
2155 sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
2156
2157 foreach_in_list(ir_instruction, node, ir) {
2158 ir_variable *const input_var = node->as_variable();
2159
2160 if (input_var != NULL && input_var->data.mode == ir_var_shader_in) {
2161 /* All interface blocks should have been lowered by this point */
2162 assert(!input_var->type->is_interface());
2163
2164 if (input_var->data.explicit_location) {
2165 /* assign_varying_locations only cares about finding the
2166 * ir_variable at the start of a contiguous location block.
2167 *
2168 * - For !producer, consumer_inputs_with_locations isn't used.
2169 *
2170 * - For !consumer, consumer_inputs_with_locations is empty.
2171 *
2172 * For consumer && producer, if you were trying to set some
2173 * ir_variable to the middle of a location block on the other side
2174 * of producer/consumer, cross_validate_outputs_to_inputs() should
2175 * be link-erroring due to either type mismatch or location
2176 * overlaps. If the variables do match up, then they've got a
2177 * matching data.location and you only looked at
2178 * consumer_inputs_with_locations[var->data.location], not any
2179 * following entries for the array/structure.
2180 */
2181 consumer_inputs_with_locations[input_var->data.location] =
2182 input_var;
2183 } else if (input_var->get_interface_type() != NULL) {
2184 char *const iface_field_name =
2185 ralloc_asprintf(mem_ctx, "%s.%s",
2186 input_var->get_interface_type()->without_array()->name,
2187 input_var->name);
2188 _mesa_hash_table_insert(consumer_interface_inputs,
2189 iface_field_name, input_var);
2190 } else {
2191 _mesa_hash_table_insert(consumer_inputs,
2192 ralloc_strdup(mem_ctx, input_var->name),
2193 input_var);
2194 }
2195 }
2196 }
2197 }
2198
2199 /**
2200 * Find a variable from the consumer that "matches" the specified variable
2201 *
2202 * This function only finds inputs with names that match. There is no
2203 * validation (here) that the types, etc. are compatible.
2204 */
2205 ir_variable *
2206 get_matching_input(void *mem_ctx,
2207 const ir_variable *output_var,
2208 hash_table *consumer_inputs,
2209 hash_table *consumer_interface_inputs,
2210 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2211 {
2212 ir_variable *input_var;
2213
2214 if (output_var->data.explicit_location) {
2215 input_var = consumer_inputs_with_locations[output_var->data.location];
2216 } else if (output_var->get_interface_type() != NULL) {
2217 char *const iface_field_name =
2218 ralloc_asprintf(mem_ctx, "%s.%s",
2219 output_var->get_interface_type()->without_array()->name,
2220 output_var->name);
2221 hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
2222 input_var = entry ? (ir_variable *) entry->data : NULL;
2223 } else {
2224 hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name);
2225 input_var = entry ? (ir_variable *) entry->data : NULL;
2226 }
2227
2228 return (input_var == NULL || input_var->data.mode != ir_var_shader_in)
2229 ? NULL : input_var;
2230 }
2231
2232 }
2233
2234 static int
2235 io_variable_cmp(const void *_a, const void *_b)
2236 {
2237 const ir_variable *const a = *(const ir_variable **) _a;
2238 const ir_variable *const b = *(const ir_variable **) _b;
2239
2240 if (a->data.explicit_location && b->data.explicit_location)
2241 return b->data.location - a->data.location;
2242
2243 if (a->data.explicit_location && !b->data.explicit_location)
2244 return 1;
2245
2246 if (!a->data.explicit_location && b->data.explicit_location)
2247 return -1;
2248
2249 return -strcmp(a->name, b->name);
2250 }
2251
2252 /**
2253 * Sort the shader IO variables into canonical order
2254 */
2255 static void
2256 canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
2257 {
2258 ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
2259 unsigned num_variables = 0;
2260
2261 foreach_in_list(ir_instruction, node, ir) {
2262 ir_variable *const var = node->as_variable();
2263
2264 if (var == NULL || var->data.mode != io_mode)
2265 continue;
2266
2267 /* If we have already encountered more I/O variables that could
2268 * successfully link, bail.
2269 */
2270 if (num_variables == ARRAY_SIZE(var_table))
2271 return;
2272
2273 var_table[num_variables++] = var;
2274 }
2275
2276 if (num_variables == 0)
2277 return;
2278
2279 /* Sort the list in reverse order (io_variable_cmp handles this). Later
2280 * we're going to push the variables on to the IR list as a stack, so we
2281 * want the last variable (in canonical order) to be first in the list.
2282 */
2283 qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
2284
2285 /* Remove the variable from it's current location in the IR, and put it at
2286 * the front.
2287 */
2288 for (unsigned i = 0; i < num_variables; i++) {
2289 var_table[i]->remove();
2290 ir->push_head(var_table[i]);
2291 }
2292 }
2293
2294 /**
2295 * Generate a bitfield map of the explicit locations for shader varyings.
2296 *
2297 * Note: For Tessellation shaders we are sitting right on the limits of the
2298 * 64 bit map. Per-vertex and per-patch both have separate location domains
2299 * with a max of MAX_VARYING.
2300 */
2301 static uint64_t
2302 reserved_varying_slot(struct gl_linked_shader *stage,
2303 ir_variable_mode io_mode)
2304 {
2305 assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
2306 /* Avoid an overflow of the returned value */
2307 assert(MAX_VARYINGS_INCL_PATCH <= 64);
2308
2309 uint64_t slots = 0;
2310 int var_slot;
2311
2312 if (!stage)
2313 return slots;
2314
2315 foreach_in_list(ir_instruction, node, stage->ir) {
2316 ir_variable *const var = node->as_variable();
2317
2318 if (var == NULL || var->data.mode != io_mode ||
2319 !var->data.explicit_location ||
2320 var->data.location < VARYING_SLOT_VAR0)
2321 continue;
2322
2323 var_slot = var->data.location - VARYING_SLOT_VAR0;
2324
2325 unsigned num_elements = get_varying_type(var, stage->Stage)
2326 ->count_attribute_slots(io_mode == ir_var_shader_in &&
2327 stage->Stage == MESA_SHADER_VERTEX);
2328 for (unsigned i = 0; i < num_elements; i++) {
2329 if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
2330 slots |= UINT64_C(1) << var_slot;
2331 var_slot += 1;
2332 }
2333 }
2334
2335 return slots;
2336 }
2337
2338
2339 /**
2340 * Assign locations for all variables that are produced in one pipeline stage
2341 * (the "producer") and consumed in the next stage (the "consumer").
2342 *
2343 * Variables produced by the producer may also be consumed by transform
2344 * feedback.
2345 *
2346 * \param num_tfeedback_decls is the number of declarations indicating
2347 * variables that may be consumed by transform feedback.
2348 *
2349 * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects
2350 * representing the result of parsing the strings passed to
2351 * glTransformFeedbackVaryings(). assign_location() will be called for
2352 * each of these objects that matches one of the outputs of the
2353 * producer.
2354 *
2355 * When num_tfeedback_decls is nonzero, it is permissible for the consumer to
2356 * be NULL. In this case, varying locations are assigned solely based on the
2357 * requirements of transform feedback.
2358 */
2359 static bool
2360 assign_varying_locations(struct gl_context *ctx,
2361 void *mem_ctx,
2362 struct gl_shader_program *prog,
2363 gl_linked_shader *producer,
2364 gl_linked_shader *consumer,
2365 unsigned num_tfeedback_decls,
2366 tfeedback_decl *tfeedback_decls,
2367 const uint64_t reserved_slots)
2368 {
2369 /* Tessellation shaders treat inputs and outputs as shared memory and can
2370 * access inputs and outputs of other invocations.
2371 * Therefore, they can't be lowered to temps easily (and definitely not
2372 * efficiently).
2373 */
2374 bool unpackable_tess =
2375 (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
2376 (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
2377 (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
2378
2379 /* Transform feedback code assumes varying arrays are packed, so if the
2380 * driver has disabled varying packing, make sure to at least enable
2381 * packing required by transform feedback.
2382 */
2383 bool xfb_enabled =
2384 ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
2385
2386 /* Disable packing on outward facing interfaces for SSO because in ES we
2387 * need to retain the unpacked varying information for draw time
2388 * validation.
2389 *
2390 * Packing is still enabled on individual arrays, structs, and matrices as
2391 * these are required by the transform feedback code and it is still safe
2392 * to do so. We also enable packing when a varying is only used for
2393 * transform feedback and its not a SSO.
2394 */
2395 bool disable_varying_packing =
2396 ctx->Const.DisableVaryingPacking || unpackable_tess;
2397 if (prog->SeparateShader && (producer == NULL || consumer == NULL))
2398 disable_varying_packing = true;
2399
2400 varying_matches matches(disable_varying_packing, xfb_enabled,
2401 ctx->Extensions.ARB_enhanced_layouts,
2402 producer ? producer->Stage : MESA_SHADER_NONE,
2403 consumer ? consumer->Stage : MESA_SHADER_NONE);
2404 hash_table *tfeedback_candidates =
2405 _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2406 _mesa_key_string_equal);
2407 hash_table *consumer_inputs =
2408 _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2409 _mesa_key_string_equal);
2410 hash_table *consumer_interface_inputs =
2411 _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2412 _mesa_key_string_equal);
2413 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
2414 NULL,
2415 };
2416
2417 unsigned consumer_vertices = 0;
2418 if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
2419 consumer_vertices = prog->Geom.VerticesIn;
2420
2421 /* Operate in a total of four passes.
2422 *
2423 * 1. Sort inputs / outputs into a canonical order. This is necessary so
2424 * that inputs / outputs of separable shaders will be assigned
2425 * predictable locations regardless of the order in which declarations
2426 * appeared in the shader source.
2427 *
2428 * 2. Assign locations for any matching inputs and outputs.
2429 *
2430 * 3. Mark output variables in the producer that do not have locations as
2431 * not being outputs. This lets the optimizer eliminate them.
2432 *
2433 * 4. Mark input variables in the consumer that do not have locations as
2434 * not being inputs. This lets the optimizer eliminate them.
2435 */
2436 if (consumer)
2437 canonicalize_shader_io(consumer->ir, ir_var_shader_in);
2438
2439 if (producer)
2440 canonicalize_shader_io(producer->ir, ir_var_shader_out);
2441
2442 if (consumer)
2443 linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
2444 consumer_inputs,
2445 consumer_interface_inputs,
2446 consumer_inputs_with_locations);
2447
2448 if (producer) {
2449 foreach_in_list(ir_instruction, node, producer->ir) {
2450 ir_variable *const output_var = node->as_variable();
2451
2452 if (output_var == NULL || output_var->data.mode != ir_var_shader_out)
2453 continue;
2454
2455 /* Only geometry shaders can use non-zero streams */
2456 assert(output_var->data.stream == 0 ||
2457 (output_var->data.stream < MAX_VERTEX_STREAMS &&
2458 producer->Stage == MESA_SHADER_GEOMETRY));
2459
2460 if (num_tfeedback_decls > 0) {
2461 tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
2462 g.process(output_var);
2463 }
2464
2465 ir_variable *const input_var =
2466 linker::get_matching_input(mem_ctx, output_var, consumer_inputs,
2467 consumer_interface_inputs,
2468 consumer_inputs_with_locations);
2469
2470 /* If a matching input variable was found, add this output (and the
2471 * input) to the set. If this is a separable program and there is no
2472 * consumer stage, add the output.
2473 *
2474 * Always add TCS outputs. They are shared by all invocations
2475 * within a patch and can be used as shared memory.
2476 */
2477 if (input_var || (prog->SeparateShader && consumer == NULL) ||
2478 producer->Stage == MESA_SHADER_TESS_CTRL) {
2479 matches.record(output_var, input_var);
2480 }
2481
2482 /* Only stream 0 outputs can be consumed in the next stage */
2483 if (input_var && output_var->data.stream != 0) {
2484 linker_error(prog, "output %s is assigned to stream=%d but "
2485 "is linked to an input, which requires stream=0",
2486 output_var->name, output_var->data.stream);
2487 return false;
2488 }
2489 }
2490 } else {
2491 /* If there's no producer stage, then this must be a separable program.
2492 * For example, we may have a program that has just a fragment shader.
2493 * Later this program will be used with some arbitrary vertex (or
2494 * geometry) shader program. This means that locations must be assigned
2495 * for all the inputs.
2496 */
2497 foreach_in_list(ir_instruction, node, consumer->ir) {
2498 ir_variable *const input_var = node->as_variable();
2499
2500 if (input_var == NULL || input_var->data.mode != ir_var_shader_in)
2501 continue;
2502
2503 matches.record(NULL, input_var);
2504 }
2505 }
2506
2507 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2508 if (!tfeedback_decls[i].is_varying())
2509 continue;
2510
2511 const tfeedback_candidate *matched_candidate
2512 = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
2513
2514 if (matched_candidate == NULL) {
2515 _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2516 return false;
2517 }
2518
2519 /* Mark xfb varyings as always active */
2520 matched_candidate->toplevel_var->data.always_active_io = 1;
2521
2522 /* Mark any corresponding inputs as always active also. We must do this
2523 * because we have a NIR pass that lowers vectors to scalars and another
2524 * that removes unused varyings.
2525 * We don't split varyings marked as always active because there is no
2526 * point in doing so. This means we need to mark both sides of the
2527 * interface as always active otherwise we will have a mismatch and
2528 * start removing things we shouldn't.
2529 */
2530 ir_variable *const input_var =
2531 linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var,
2532 consumer_inputs,
2533 consumer_interface_inputs,
2534 consumer_inputs_with_locations);
2535 if (input_var)
2536 input_var->data.always_active_io = 1;
2537
2538 if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
2539 matched_candidate->toplevel_var->data.is_xfb_only = 1;
2540 matches.record(matched_candidate->toplevel_var, NULL);
2541 }
2542 }
2543
2544 _mesa_hash_table_destroy(consumer_inputs, NULL);
2545 _mesa_hash_table_destroy(consumer_interface_inputs, NULL);
2546
2547 uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
2548 const unsigned slots_used = matches.assign_locations(
2549 prog, components, reserved_slots);
2550 matches.store_locations();
2551
2552 for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2553 if (!tfeedback_decls[i].is_varying())
2554 continue;
2555
2556 if (!tfeedback_decls[i].assign_location(ctx, prog)) {
2557 _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2558 return false;
2559 }
2560 }
2561 _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2562
2563 if (consumer && producer) {
2564 foreach_in_list(ir_instruction, node, consumer->ir) {
2565 ir_variable *const var = node->as_variable();
2566
2567 if (var && var->data.mode == ir_var_shader_in &&
2568 var->data.is_unmatched_generic_inout) {
2569 if (!prog->IsES && prog->data->Version <= 120) {
2570 /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
2571 *
2572 * Only those varying variables used (i.e. read) in
2573 * the fragment shader executable must be written to
2574 * by the vertex shader executable; declaring
2575 * superfluous varying variables in a vertex shader is
2576 * permissible.
2577 *
2578 * We interpret this text as meaning that the VS must
2579 * write the variable for the FS to read it. See
2580 * "glsl1-varying read but not written" in piglit.
2581 */
2582 linker_error(prog, "%s shader varying %s not written "
2583 "by %s shader\n.",
2584 _mesa_shader_stage_to_string(consumer->Stage),
2585 var->name,
2586 _mesa_shader_stage_to_string(producer->Stage));
2587 } else {
2588 linker_warning(prog, "%s shader varying %s not written "
2589 "by %s shader\n.",
2590 _mesa_shader_stage_to_string(consumer->Stage),
2591 var->name,
2592 _mesa_shader_stage_to_string(producer->Stage));
2593 }
2594 }
2595 }
2596
2597 /* Now that validation is done its safe to remove unused varyings. As
2598 * we have both a producer and consumer its safe to remove unused
2599 * varyings even if the program is a SSO because the stages are being
2600 * linked together i.e. we have a multi-stage SSO.
2601 */
2602 remove_unused_shader_inputs_and_outputs(false, producer,
2603 ir_var_shader_out);
2604 remove_unused_shader_inputs_and_outputs(false, consumer,
2605 ir_var_shader_in);
2606 }
2607
2608 if (producer) {
2609 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
2610 0, producer, disable_varying_packing,
2611 xfb_enabled);
2612 }
2613
2614 if (consumer) {
2615 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
2616 consumer_vertices, consumer,
2617 disable_varying_packing, xfb_enabled);
2618 }
2619
2620 return true;
2621 }
2622
2623 static bool
2624 check_against_output_limit(struct gl_context *ctx,
2625 struct gl_shader_program *prog,
2626 gl_linked_shader *producer,
2627 unsigned num_explicit_locations)
2628 {
2629 unsigned output_vectors = num_explicit_locations;
2630
2631 foreach_in_list(ir_instruction, node, producer->ir) {
2632 ir_variable *const var = node->as_variable();
2633
2634 if (var && !var->data.explicit_location &&
2635 var->data.mode == ir_var_shader_out &&
2636 var_counts_against_varying_limit(producer->Stage, var)) {
2637 /* outputs for fragment shader can't be doubles */
2638 output_vectors += var->type->count_attribute_slots(false);
2639 }
2640 }
2641
2642 assert(producer->Stage != MESA_SHADER_FRAGMENT);
2643 unsigned max_output_components =
2644 ctx->Const.Program[producer->Stage].MaxOutputComponents;
2645
2646 const unsigned output_components = output_vectors * 4;
2647 if (output_components > max_output_components) {
2648 if (ctx->API == API_OPENGLES2 || prog->IsES)
2649 linker_error(prog, "%s shader uses too many output vectors "
2650 "(%u > %u)\n",
2651 _mesa_shader_stage_to_string(producer->Stage),
2652 output_vectors,
2653 max_output_components / 4);
2654 else
2655 linker_error(prog, "%s shader uses too many output components "
2656 "(%u > %u)\n",
2657 _mesa_shader_stage_to_string(producer->Stage),
2658 output_components,
2659 max_output_components);
2660
2661 return false;
2662 }
2663
2664 return true;
2665 }
2666
2667 static bool
2668 check_against_input_limit(struct gl_context *ctx,
2669 struct gl_shader_program *prog,
2670 gl_linked_shader *consumer,
2671 unsigned num_explicit_locations)
2672 {
2673 unsigned input_vectors = num_explicit_locations;
2674
2675 foreach_in_list(ir_instruction, node, consumer->ir) {
2676 ir_variable *const var = node->as_variable();
2677
2678 if (var && !var->data.explicit_location &&
2679 var->data.mode == ir_var_shader_in &&
2680 var_counts_against_varying_limit(consumer->Stage, var)) {
2681 /* vertex inputs aren't varying counted */
2682 input_vectors += var->type->count_attribute_slots(false);
2683 }
2684 }
2685
2686 assert(consumer->Stage != MESA_SHADER_VERTEX);
2687 unsigned max_input_components =
2688 ctx->Const.Program[consumer->Stage].MaxInputComponents;
2689
2690 const unsigned input_components = input_vectors * 4;
2691 if (input_components > max_input_components) {
2692 if (ctx->API == API_OPENGLES2 || prog->IsES)
2693 linker_error(prog, "%s shader uses too many input vectors "
2694 "(%u > %u)\n",
2695 _mesa_shader_stage_to_string(consumer->Stage),
2696 input_vectors,
2697 max_input_components / 4);
2698 else
2699 linker_error(prog, "%s shader uses too many input components "
2700 "(%u > %u)\n",
2701 _mesa_shader_stage_to_string(consumer->Stage),
2702 input_components,
2703 max_input_components);
2704
2705 return false;
2706 }
2707
2708 return true;
2709 }
2710
2711 bool
2712 link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
2713 struct gl_context *ctx, void *mem_ctx)
2714 {
2715 bool has_xfb_qualifiers = false;
2716 unsigned num_tfeedback_decls = 0;
2717 char **varying_names = NULL;
2718 tfeedback_decl *tfeedback_decls = NULL;
2719
2720 /* From the ARB_enhanced_layouts spec:
2721 *
2722 * "If the shader used to record output variables for transform feedback
2723 * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
2724 * qualifiers, the values specified by TransformFeedbackVaryings are
2725 * ignored, and the set of variables captured for transform feedback is
2726 * instead derived from the specified layout qualifiers."
2727 */
2728 for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
2729 /* Find last stage before fragment shader */
2730 if (prog->_LinkedShaders[i]) {
2731 has_xfb_qualifiers =
2732 process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
2733 prog, &num_tfeedback_decls,
2734 &varying_names);
2735 break;
2736 }
2737 }
2738
2739 if (!has_xfb_qualifiers) {
2740 num_tfeedback_decls = prog->TransformFeedback.NumVarying;
2741 varying_names = prog->TransformFeedback.VaryingNames;
2742 }
2743
2744 if (num_tfeedback_decls != 0) {
2745 /* From GL_EXT_transform_feedback:
2746 * A program will fail to link if:
2747 *
2748 * * the <count> specified by TransformFeedbackVaryingsEXT is
2749 * non-zero, but the program object has no vertex or geometry
2750 * shader;
2751 */
2752 if (first >= MESA_SHADER_FRAGMENT) {
2753 linker_error(prog, "Transform feedback varyings specified, but "
2754 "no vertex, tessellation, or geometry shader is "
2755 "present.\n");
2756 return false;
2757 }
2758
2759 tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,
2760 num_tfeedback_decls);
2761 if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls,
2762 varying_names, tfeedback_decls))
2763 return false;
2764 }
2765
2766 /* If there is no fragment shader we need to set transform feedback.
2767 *
2768 * For SSO we also need to assign output locations. We assign them here
2769 * because we need to do it for both single stage programs and multi stage
2770 * programs.
2771 */
2772 if (last < MESA_SHADER_FRAGMENT &&
2773 (num_tfeedback_decls != 0 || prog->SeparateShader)) {
2774 const uint64_t reserved_out_slots =
2775 reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out);
2776 if (!assign_varying_locations(ctx, mem_ctx, prog,
2777 prog->_LinkedShaders[last], NULL,
2778 num_tfeedback_decls, tfeedback_decls,
2779 reserved_out_slots))
2780 return false;
2781 }
2782
2783 if (last <= MESA_SHADER_FRAGMENT) {
2784 /* Remove unused varyings from the first/last stage unless SSO */
2785 remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
2786 prog->_LinkedShaders[first],
2787 ir_var_shader_in);
2788 remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
2789 prog->_LinkedShaders[last],
2790 ir_var_shader_out);
2791
2792 /* If the program is made up of only a single stage */
2793 if (first == last) {
2794 gl_linked_shader *const sh = prog->_LinkedShaders[last];
2795
2796 do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
2797 do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
2798 tfeedback_decls);
2799
2800 if (prog->SeparateShader) {
2801 const uint64_t reserved_slots =
2802 reserved_varying_slot(sh, ir_var_shader_in);
2803
2804 /* Assign input locations for SSO, output locations are already
2805 * assigned.
2806 */
2807 if (!assign_varying_locations(ctx, mem_ctx, prog,
2808 NULL /* producer */,
2809 sh /* consumer */,
2810 0 /* num_tfeedback_decls */,
2811 NULL /* tfeedback_decls */,
2812 reserved_slots))
2813 return false;
2814 }
2815 } else {
2816 /* Linking the stages in the opposite order (from fragment to vertex)
2817 * ensures that inter-shader outputs written to in an earlier stage
2818 * are eliminated if they are (transitively) not used in a later
2819 * stage.
2820 */
2821 int next = last;
2822 for (int i = next - 1; i >= 0; i--) {
2823 if (prog->_LinkedShaders[i] == NULL && i != 0)
2824 continue;
2825
2826 gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
2827 gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
2828
2829 const uint64_t reserved_out_slots =
2830 reserved_varying_slot(sh_i, ir_var_shader_out);
2831 const uint64_t reserved_in_slots =
2832 reserved_varying_slot(sh_next, ir_var_shader_in);
2833
2834 do_dead_builtin_varyings(ctx, sh_i, sh_next,
2835 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
2836 tfeedback_decls);
2837
2838 if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
2839 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
2840 tfeedback_decls,
2841 reserved_out_slots | reserved_in_slots))
2842 return false;
2843
2844 /* This must be done after all dead varyings are eliminated. */
2845 if (sh_i != NULL) {
2846 unsigned slots_used = _mesa_bitcount_64(reserved_out_slots);
2847 if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) {
2848 return false;
2849 }
2850 }
2851
2852 unsigned slots_used = _mesa_bitcount_64(reserved_in_slots);
2853 if (!check_against_input_limit(ctx, prog, sh_next, slots_used))
2854 return false;
2855
2856 next = i;
2857 }
2858 }
2859 }
2860
2861 if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
2862 has_xfb_qualifiers))
2863 return false;
2864
2865 return true;
2866 }