glsl: build ubo name and indexing offset for AoA
[mesa.git] / src / glsl / lower_ubo_reference.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_ubo_reference.cpp
26 *
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
30 *
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
33 * their own.
34 */
35
36 #include "ir.h"
37 #include "ir_builder.h"
38 #include "ir_rvalue_visitor.h"
39 #include "main/macros.h"
40 #include "glsl_parser_extras.h"
41
42 using namespace ir_builder;
43
44 /**
45 * Determine if a thing being dereferenced is row-major
46 *
47 * There is some trickery here.
48 *
49 * If the thing being dereferenced is a member of uniform block \b without an
50 * instance name, then the name of the \c ir_variable is the field name of an
51 * interface type. If this field is row-major, then the thing referenced is
52 * row-major.
53 *
54 * If the thing being dereferenced is a member of uniform block \b with an
55 * instance name, then the last dereference in the tree will be an
56 * \c ir_dereference_record. If that record field is row-major, then the
57 * thing referenced is row-major.
58 */
59 static bool
60 is_dereferenced_thing_row_major(const ir_rvalue *deref)
61 {
62 bool matrix = false;
63 const ir_rvalue *ir = deref;
64
65 while (true) {
66 matrix = matrix || ir->type->without_array()->is_matrix();
67
68 switch (ir->ir_type) {
69 case ir_type_dereference_array: {
70 const ir_dereference_array *const array_deref =
71 (const ir_dereference_array *) ir;
72
73 ir = array_deref->array;
74 break;
75 }
76
77 case ir_type_dereference_record: {
78 const ir_dereference_record *const record_deref =
79 (const ir_dereference_record *) ir;
80
81 ir = record_deref->record;
82
83 const int idx = ir->type->field_index(record_deref->field);
84 assert(idx >= 0);
85
86 const enum glsl_matrix_layout matrix_layout =
87 glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout);
88
89 switch (matrix_layout) {
90 case GLSL_MATRIX_LAYOUT_INHERITED:
91 break;
92 case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
93 return false;
94 case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
95 return matrix || deref->type->without_array()->is_record();
96 }
97
98 break;
99 }
100
101 case ir_type_dereference_variable: {
102 const ir_dereference_variable *const var_deref =
103 (const ir_dereference_variable *) ir;
104
105 const enum glsl_matrix_layout matrix_layout =
106 glsl_matrix_layout(var_deref->var->data.matrix_layout);
107
108 switch (matrix_layout) {
109 case GLSL_MATRIX_LAYOUT_INHERITED:
110 assert(!matrix);
111 return false;
112 case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
113 return false;
114 case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
115 return matrix || deref->type->without_array()->is_record();
116 }
117
118 unreachable("invalid matrix layout");
119 break;
120 }
121
122 default:
123 return false;
124 }
125 }
126
127 /* The tree must have ended with a dereference that wasn't an
128 * ir_dereference_variable. That is invalid, and it should be impossible.
129 */
130 unreachable("invalid dereference tree");
131 return false;
132 }
133
134 namespace {
135 class lower_ubo_reference_visitor : public ir_rvalue_enter_visitor {
136 public:
137 lower_ubo_reference_visitor(struct gl_shader *shader)
138 : shader(shader)
139 {
140 }
141
142 void handle_rvalue(ir_rvalue **rvalue);
143 ir_visitor_status visit_enter(ir_assignment *ir);
144
145 void setup_for_load_or_store(ir_variable *var,
146 ir_rvalue *deref,
147 ir_rvalue **offset,
148 unsigned *const_offset,
149 bool *row_major,
150 int *matrix_columns,
151 unsigned packing);
152 ir_expression *ubo_load(const struct glsl_type *type,
153 ir_rvalue *offset);
154 ir_call *ssbo_load(const struct glsl_type *type,
155 ir_rvalue *offset);
156
157 void check_for_ssbo_store(ir_assignment *ir);
158 void write_to_memory(ir_dereference *deref,
159 ir_variable *var,
160 ir_variable *write_var,
161 unsigned write_mask);
162 ir_call *ssbo_store(ir_rvalue *deref, ir_rvalue *offset,
163 unsigned write_mask);
164
165 void emit_access(bool is_write, ir_dereference *deref,
166 ir_variable *base_offset, unsigned int deref_offset,
167 bool row_major, int matrix_columns,
168 unsigned packing, unsigned write_mask);
169
170 ir_visitor_status visit_enter(class ir_expression *);
171 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
172 void check_ssbo_unsized_array_length_expression(class ir_expression *);
173 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
174
175 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
176 ir_dereference *,
177 ir_variable *);
178 ir_expression *emit_ssbo_get_buffer_size();
179
180 unsigned calculate_unsized_array_stride(ir_dereference *deref,
181 unsigned packing);
182
183 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
184 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
185 ir_visitor_status visit_enter(ir_call *ir);
186
187 void *mem_ctx;
188 struct gl_shader *shader;
189 struct gl_uniform_buffer_variable *ubo_var;
190 ir_rvalue *uniform_block;
191 bool progress;
192 bool is_shader_storage;
193 };
194
195 /**
196 * Determine the name of the interface block field
197 *
198 * This is the name of the specific member as it would appear in the
199 * \c gl_uniform_buffer_variable::Name field in the shader's
200 * \c UniformBlocks array.
201 */
202 static const char *
203 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
204 ir_rvalue **nonconst_block_index)
205 {
206 *nonconst_block_index = NULL;
207 char *name_copy = NULL;
208 size_t base_length = 0;
209
210 /* Loop back through the IR until we find the uniform block */
211 ir_rvalue *ir = d;
212 while (ir != NULL) {
213 switch (ir->ir_type) {
214 case ir_type_dereference_variable: {
215 /* Exit loop */
216 ir = NULL;
217 break;
218 }
219
220 case ir_type_dereference_record: {
221 ir_dereference_record *r = (ir_dereference_record *) ir;
222 ir = r->record->as_dereference();
223
224 /* If we got here it means any previous array subscripts belong to
225 * block members and not the block itself so skip over them in the
226 * next pass.
227 */
228 d = ir;
229 break;
230 }
231
232 case ir_type_dereference_array: {
233 ir_dereference_array *a = (ir_dereference_array *) ir;
234 ir = a->array->as_dereference();
235 break;
236 }
237
238 case ir_type_swizzle: {
239 ir_swizzle *s = (ir_swizzle *) ir;
240 ir = s->val->as_dereference();
241 break;
242 }
243
244 default:
245 assert(!"Should not get here.");
246 break;
247 }
248 }
249
250 while (d != NULL) {
251 switch (d->ir_type) {
252 case ir_type_dereference_variable: {
253 ir_dereference_variable *v = (ir_dereference_variable *) d;
254 if (name_copy != NULL &&
255 v->var->is_interface_instance() &&
256 v->var->type->is_array()) {
257 return name_copy;
258 } else {
259 *nonconst_block_index = NULL;
260 return base_name;
261 }
262
263 break;
264 }
265
266 case ir_type_dereference_array: {
267 ir_dereference_array *a = (ir_dereference_array *) d;
268 size_t new_length;
269
270 if (name_copy == NULL) {
271 name_copy = ralloc_strdup(mem_ctx, base_name);
272 base_length = strlen(name_copy);
273 }
274
275 /* For arrays of arrays we start at the innermost array and work our
276 * way out so we need to insert the subscript at the base of the
277 * name string rather than just attaching it to the end.
278 */
279 new_length = base_length;
280 ir_constant *const_index = a->array_index->as_constant();
281 char *end = ralloc_strdup(NULL, &name_copy[new_length]);
282 if (!const_index) {
283 ir_rvalue *array_index = a->array_index;
284 if (array_index->type != glsl_type::uint_type)
285 array_index = i2u(array_index);
286
287 if (a->array->type->fields.array->is_array()) {
288 ir_constant *base_size = new(mem_ctx)
289 ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
290 array_index = mul(array_index, base_size);
291 }
292
293 if (*nonconst_block_index) {
294 *nonconst_block_index = add(*nonconst_block_index, array_index);
295 } else {
296 *nonconst_block_index = array_index;
297 }
298
299 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
300 end);
301 } else {
302 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
303 const_index->get_uint_component(0),
304 end);
305 }
306 ralloc_free(end);
307
308 d = a->array->as_dereference();
309
310 break;
311 }
312
313 default:
314 assert(!"Should not get here.");
315 break;
316 }
317 }
318
319 assert(!"Should not get here.");
320 return NULL;
321 }
322
323 void
324 lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
325 ir_rvalue *deref,
326 ir_rvalue **offset,
327 unsigned *const_offset,
328 bool *row_major,
329 int *matrix_columns,
330 unsigned packing)
331 {
332 /* Determine the name of the interface block */
333 ir_rvalue *nonconst_block_index;
334 const char *const field_name =
335 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
336 deref, &nonconst_block_index);
337
338 /* Locate the block by interface name */
339 this->is_shader_storage = var->is_in_shader_storage_block();
340 unsigned num_blocks;
341 struct gl_uniform_block **blocks;
342 if (this->is_shader_storage) {
343 num_blocks = shader->NumShaderStorageBlocks;
344 blocks = shader->ShaderStorageBlocks;
345 } else {
346 num_blocks = shader->NumUniformBlocks;
347 blocks = shader->UniformBlocks;
348 }
349 this->uniform_block = NULL;
350 for (unsigned i = 0; i < num_blocks; i++) {
351 if (strcmp(field_name, blocks[i]->Name) == 0) {
352
353 ir_constant *index = new(mem_ctx) ir_constant(i);
354
355 if (nonconst_block_index) {
356 this->uniform_block = add(nonconst_block_index, index);
357 } else {
358 this->uniform_block = index;
359 }
360
361 this->ubo_var = var->is_interface_instance()
362 ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
363
364 break;
365 }
366 }
367
368 assert(this->uniform_block);
369
370 *offset = new(mem_ctx) ir_constant(0u);
371 *const_offset = 0;
372 *row_major = is_dereferenced_thing_row_major(deref);
373 *matrix_columns = 1;
374
375 /* Calculate the offset to the start of the region of the UBO
376 * dereferenced by *rvalue. This may be a variable offset if an
377 * array dereference has a variable index.
378 */
379 while (deref) {
380 switch (deref->ir_type) {
381 case ir_type_dereference_variable: {
382 *const_offset += ubo_var->Offset;
383 deref = NULL;
384 break;
385 }
386
387 case ir_type_dereference_array: {
388 ir_dereference_array *deref_array = (ir_dereference_array *) deref;
389 unsigned array_stride;
390 if (deref_array->array->type->is_matrix() && *row_major) {
391 /* When loading a vector out of a row major matrix, the
392 * step between the columns (vectors) is the size of a
393 * float, while the step between the rows (elements of a
394 * vector) is handled below in emit_ubo_loads.
395 */
396 array_stride = 4;
397 if (deref_array->array->type->is_double())
398 array_stride *= 2;
399 *matrix_columns = deref_array->array->type->matrix_columns;
400 } else if (deref_array->type->is_interface()) {
401 /* We're processing an array dereference of an interface instance
402 * array. The thing being dereferenced *must* be a variable
403 * dereference because interfaces cannot be embedded in other
404 * types. In terms of calculating the offsets for the lowering
405 * pass, we don't care about the array index. All elements of an
406 * interface instance array will have the same offsets relative to
407 * the base of the block that backs them.
408 */
409 assert(deref_array->array->as_dereference_variable());
410 deref = deref_array->array->as_dereference();
411 break;
412 } else {
413 /* Whether or not the field is row-major (because it might be a
414 * bvec2 or something) does not affect the array itself. We need
415 * to know whether an array element in its entirety is row-major.
416 */
417 const bool array_row_major =
418 is_dereferenced_thing_row_major(deref_array);
419
420 /* The array type will give the correct interface packing
421 * information
422 */
423 if (packing == GLSL_INTERFACE_PACKING_STD430) {
424 array_stride = deref_array->type->std430_array_stride(array_row_major);
425 } else {
426 array_stride = deref_array->type->std140_size(array_row_major);
427 array_stride = glsl_align(array_stride, 16);
428 }
429 }
430
431 ir_rvalue *array_index = deref_array->array_index;
432 if (array_index->type->base_type == GLSL_TYPE_INT)
433 array_index = i2u(array_index);
434
435 ir_constant *const_index =
436 array_index->constant_expression_value(NULL);
437 if (const_index) {
438 *const_offset += array_stride * const_index->value.u[0];
439 } else {
440 *offset = add(*offset,
441 mul(array_index,
442 new(mem_ctx) ir_constant(array_stride)));
443 }
444 deref = deref_array->array->as_dereference();
445 break;
446 }
447
448 case ir_type_dereference_record: {
449 ir_dereference_record *deref_record = (ir_dereference_record *) deref;
450 const glsl_type *struct_type = deref_record->record->type;
451 unsigned intra_struct_offset = 0;
452
453 for (unsigned int i = 0; i < struct_type->length; i++) {
454 const glsl_type *type = struct_type->fields.structure[i].type;
455
456 ir_dereference_record *field_deref = new(mem_ctx)
457 ir_dereference_record(deref_record->record,
458 struct_type->fields.structure[i].name);
459 const bool field_row_major =
460 is_dereferenced_thing_row_major(field_deref);
461
462 ralloc_free(field_deref);
463
464 unsigned field_align = 0;
465
466 if (packing == GLSL_INTERFACE_PACKING_STD430)
467 field_align = type->std430_base_alignment(field_row_major);
468 else
469 field_align = type->std140_base_alignment(field_row_major);
470
471 intra_struct_offset = glsl_align(intra_struct_offset, field_align);
472
473 if (strcmp(struct_type->fields.structure[i].name,
474 deref_record->field) == 0)
475 break;
476
477 if (packing == GLSL_INTERFACE_PACKING_STD430)
478 intra_struct_offset += type->std430_size(field_row_major);
479 else
480 intra_struct_offset += type->std140_size(field_row_major);
481
482 /* If the field just examined was itself a structure, apply rule
483 * #9:
484 *
485 * "The structure may have padding at the end; the base offset
486 * of the member following the sub-structure is rounded up to
487 * the next multiple of the base alignment of the structure."
488 */
489 if (type->without_array()->is_record()) {
490 intra_struct_offset = glsl_align(intra_struct_offset,
491 field_align);
492
493 }
494 }
495
496 *const_offset += intra_struct_offset;
497 deref = deref_record->record->as_dereference();
498 break;
499 }
500
501 case ir_type_swizzle: {
502 ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
503
504 assert(deref_swizzle->mask.num_components == 1);
505
506 *const_offset += deref_swizzle->mask.x * sizeof(int);
507 deref = deref_swizzle->val->as_dereference();
508 break;
509 }
510
511 default:
512 assert(!"not reached");
513 deref = NULL;
514 break;
515 }
516 }
517 }
518
519 void
520 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
521 {
522 if (!*rvalue)
523 return;
524
525 ir_dereference *deref = (*rvalue)->as_dereference();
526 if (!deref)
527 return;
528
529 ir_variable *var = deref->variable_referenced();
530 if (!var || !var->is_in_buffer_block())
531 return;
532
533 mem_ctx = ralloc_parent(shader->ir);
534
535 ir_rvalue *offset = NULL;
536 unsigned const_offset;
537 bool row_major;
538 int matrix_columns;
539 unsigned packing = var->get_interface_type()->interface_packing;
540
541 /* Compute the offset to the start if the dereference as well as other
542 * information we need to configure the write
543 */
544 setup_for_load_or_store(var, deref,
545 &offset, &const_offset,
546 &row_major, &matrix_columns,
547 packing);
548 assert(offset);
549
550 /* Now that we've calculated the offset to the start of the
551 * dereference, walk over the type and emit loads into a temporary.
552 */
553 const glsl_type *type = (*rvalue)->type;
554 ir_variable *load_var = new(mem_ctx) ir_variable(type,
555 "ubo_load_temp",
556 ir_var_temporary);
557 base_ir->insert_before(load_var);
558
559 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
560 "ubo_load_temp_offset",
561 ir_var_temporary);
562 base_ir->insert_before(load_offset);
563 base_ir->insert_before(assign(load_offset, offset));
564
565 deref = new(mem_ctx) ir_dereference_variable(load_var);
566 emit_access(false, deref, load_offset, const_offset,
567 row_major, matrix_columns, packing, 0);
568 *rvalue = deref;
569
570 progress = true;
571 }
572
573 ir_expression *
574 lower_ubo_reference_visitor::ubo_load(const glsl_type *type,
575 ir_rvalue *offset)
576 {
577 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
578 return new(mem_ctx)
579 ir_expression(ir_binop_ubo_load,
580 type,
581 block_ref,
582 offset);
583
584 }
585
586 static bool
587 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
588 {
589 return state->ARB_shader_storage_buffer_object_enable;
590 }
591
592 ir_call *
593 lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref,
594 ir_rvalue *offset,
595 unsigned write_mask)
596 {
597 exec_list sig_params;
598
599 ir_variable *block_ref = new(mem_ctx)
600 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
601 sig_params.push_tail(block_ref);
602
603 ir_variable *offset_ref = new(mem_ctx)
604 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
605 sig_params.push_tail(offset_ref);
606
607 ir_variable *val_ref = new(mem_ctx)
608 ir_variable(deref->type, "value" , ir_var_function_in);
609 sig_params.push_tail(val_ref);
610
611 ir_variable *writemask_ref = new(mem_ctx)
612 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
613 sig_params.push_tail(writemask_ref);
614
615 ir_function_signature *sig = new(mem_ctx)
616 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
617 assert(sig);
618 sig->replace_parameters(&sig_params);
619 sig->is_intrinsic = true;
620
621 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
622 f->add_signature(sig);
623
624 exec_list call_params;
625 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
626 call_params.push_tail(offset->clone(mem_ctx, NULL));
627 call_params.push_tail(deref->clone(mem_ctx, NULL));
628 call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
629 return new(mem_ctx) ir_call(sig, NULL, &call_params);
630 }
631
632 ir_call *
633 lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type,
634 ir_rvalue *offset)
635 {
636 exec_list sig_params;
637
638 ir_variable *block_ref = new(mem_ctx)
639 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
640 sig_params.push_tail(block_ref);
641
642 ir_variable *offset_ref = new(mem_ctx)
643 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
644 sig_params.push_tail(offset_ref);
645
646 ir_function_signature *sig =
647 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
648 assert(sig);
649 sig->replace_parameters(&sig_params);
650 sig->is_intrinsic = true;
651
652 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
653 f->add_signature(sig);
654
655 ir_variable *result = new(mem_ctx)
656 ir_variable(type, "ssbo_load_result", ir_var_temporary);
657 base_ir->insert_before(result);
658 ir_dereference_variable *deref_result = new(mem_ctx)
659 ir_dereference_variable(result);
660
661 exec_list call_params;
662 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
663 call_params.push_tail(offset->clone(mem_ctx, NULL));
664
665 return new(mem_ctx) ir_call(sig, deref_result, &call_params);
666 }
667
668 static inline int
669 writemask_for_size(unsigned n)
670 {
671 return ((1 << n) - 1);
672 }
673
674 /**
675 * Takes a deref and recursively calls itself to break the deref down to the
676 * point that the reads or writes generated are contiguous scalars or vectors.
677 */
678 void
679 lower_ubo_reference_visitor::emit_access(bool is_write,
680 ir_dereference *deref,
681 ir_variable *base_offset,
682 unsigned int deref_offset,
683 bool row_major,
684 int matrix_columns,
685 unsigned packing,
686 unsigned write_mask)
687 {
688 if (deref->type->is_record()) {
689 unsigned int field_offset = 0;
690
691 for (unsigned i = 0; i < deref->type->length; i++) {
692 const struct glsl_struct_field *field =
693 &deref->type->fields.structure[i];
694 ir_dereference *field_deref =
695 new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
696 field->name);
697
698 field_offset =
699 glsl_align(field_offset,
700 field->type->std140_base_alignment(row_major));
701
702 emit_access(is_write, field_deref, base_offset,
703 deref_offset + field_offset,
704 row_major, 1, packing,
705 writemask_for_size(field_deref->type->vector_elements));
706
707 field_offset += field->type->std140_size(row_major);
708 }
709 return;
710 }
711
712 if (deref->type->is_array()) {
713 unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
714 deref->type->fields.array->std430_array_stride(row_major) :
715 glsl_align(deref->type->fields.array->std140_size(row_major), 16);
716
717 for (unsigned i = 0; i < deref->type->length; i++) {
718 ir_constant *element = new(mem_ctx) ir_constant(i);
719 ir_dereference *element_deref =
720 new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
721 element);
722 emit_access(is_write, element_deref, base_offset,
723 deref_offset + i * array_stride,
724 row_major, 1, packing,
725 writemask_for_size(element_deref->type->vector_elements));
726 }
727 return;
728 }
729
730 if (deref->type->is_matrix()) {
731 for (unsigned i = 0; i < deref->type->matrix_columns; i++) {
732 ir_constant *col = new(mem_ctx) ir_constant(i);
733 ir_dereference *col_deref =
734 new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col);
735
736 if (row_major) {
737 /* For a row-major matrix, the next column starts at the next
738 * element.
739 */
740 int size_mul = deref->type->is_double() ? 8 : 4;
741 emit_access(is_write, col_deref, base_offset,
742 deref_offset + i * size_mul,
743 row_major, deref->type->matrix_columns, packing,
744 writemask_for_size(col_deref->type->vector_elements));
745 } else {
746 int size_mul;
747
748 /* std430 doesn't round up vec2 size to a vec4 size */
749 if (packing == GLSL_INTERFACE_PACKING_STD430 &&
750 deref->type->vector_elements == 2 &&
751 !deref->type->is_double()) {
752 size_mul = 8;
753 } else {
754 /* std140 always rounds the stride of arrays (and matrices) to a
755 * vec4, so matrices are always 16 between columns/rows. With
756 * doubles, they will be 32 apart when there are more than 2 rows.
757 *
758 * For both std140 and std430, if the member is a
759 * three-'component vector with components consuming N basic
760 * machine units, the base alignment is 4N. For vec4, base
761 * alignment is 4N.
762 */
763 size_mul = (deref->type->is_double() &&
764 deref->type->vector_elements > 2) ? 32 : 16;
765 }
766
767 emit_access(is_write, col_deref, base_offset,
768 deref_offset + i * size_mul,
769 row_major, deref->type->matrix_columns, packing,
770 writemask_for_size(col_deref->type->vector_elements));
771 }
772 }
773 return;
774 }
775
776 assert(deref->type->is_scalar() || deref->type->is_vector());
777
778 if (!row_major) {
779 ir_rvalue *offset =
780 add(base_offset, new(mem_ctx) ir_constant(deref_offset));
781 if (is_write)
782 base_ir->insert_after(ssbo_store(deref, offset, write_mask));
783 else {
784 if (!this->is_shader_storage) {
785 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
786 ubo_load(deref->type, offset)));
787 } else {
788 ir_call *load_ssbo = ssbo_load(deref->type, offset);
789 base_ir->insert_before(load_ssbo);
790 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
791 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), value));
792 }
793 }
794 } else {
795 unsigned N = deref->type->is_double() ? 8 : 4;
796
797 /* We're dereffing a column out of a row-major matrix, so we
798 * gather the vector from each stored row.
799 */
800 assert(deref->type->base_type == GLSL_TYPE_FLOAT ||
801 deref->type->base_type == GLSL_TYPE_DOUBLE);
802 /* Matrices, row_major or not, are stored as if they were
803 * arrays of vectors of the appropriate size in std140.
804 * Arrays have their strides rounded up to a vec4, so the
805 * matrix stride is always 16. However a double matrix may either be 16
806 * or 32 depending on the number of columns.
807 */
808 assert(matrix_columns <= 4);
809 unsigned matrix_stride = 0;
810 /* Matrix stride for std430 mat2xY matrices are not rounded up to
811 * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform
812 * Block Layout":
813 *
814 * "2. If the member is a two- or four-component vector with components
815 * consuming N basic machine units, the base alignment is 2N or 4N,
816 * respectively." [...]
817 * "4. If the member is an array of scalars or vectors, the base alignment
818 * and array stride are set to match the base alignment of a single array
819 * element, according to rules (1), (2), and (3), and rounded up to the
820 * base alignment of a vec4." [...]
821 * "7. If the member is a row-major matrix with C columns and R rows, the
822 * matrix is stored identically to an array of R row vectors with C
823 * components each, according to rule (4)." [...]
824 * "When using the std430 storage layout, shader storage blocks will be
825 * laid out in buffer storage identically to uniform and shader storage
826 * blocks using the std140 layout, except that the base alignment and
827 * stride of arrays of scalars and vectors in rule 4 and of structures in
828 * rule 9 are not rounded up a multiple of the base alignment of a vec4."
829 */
830 if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2)
831 matrix_stride = 2 * N;
832 else
833 matrix_stride = glsl_align(matrix_columns * N, 16);
834
835 const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
836 glsl_type::float_type : glsl_type::double_type;
837
838 for (unsigned i = 0; i < deref->type->vector_elements; i++) {
839 ir_rvalue *chan_offset =
840 add(base_offset,
841 new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
842 if (is_write) {
843 /* If the component is not in the writemask, then don't
844 * store any value.
845 */
846 if (!((1 << i) & write_mask))
847 continue;
848
849 base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1));
850 } else {
851 if (!this->is_shader_storage) {
852 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
853 ubo_load(deref_type, chan_offset),
854 (1U << i)));
855 } else {
856 ir_call *load_ssbo = ssbo_load(deref_type, chan_offset);
857 base_ir->insert_before(load_ssbo);
858 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
859 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
860 value,
861 (1U << i)));
862 }
863 }
864 }
865 }
866 }
867
868 void
869 lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
870 ir_variable *var,
871 ir_variable *write_var,
872 unsigned write_mask)
873 {
874 ir_rvalue *offset = NULL;
875 unsigned const_offset;
876 bool row_major;
877 int matrix_columns;
878 unsigned packing = var->get_interface_type()->interface_packing;
879
880 /* Compute the offset to the start if the dereference as well as other
881 * information we need to configure the write
882 */
883 setup_for_load_or_store(var, deref,
884 &offset, &const_offset,
885 &row_major, &matrix_columns,
886 packing);
887 assert(offset);
888
889 /* Now emit writes from the temporary to memory */
890 ir_variable *write_offset =
891 new(mem_ctx) ir_variable(glsl_type::uint_type,
892 "ssbo_store_temp_offset",
893 ir_var_temporary);
894
895 base_ir->insert_before(write_offset);
896 base_ir->insert_before(assign(write_offset, offset));
897
898 deref = new(mem_ctx) ir_dereference_variable(write_var);
899 emit_access(true, deref, write_offset, const_offset,
900 row_major, matrix_columns, packing, write_mask);
901 }
902
903 ir_visitor_status
904 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
905 {
906 check_ssbo_unsized_array_length_expression(ir);
907 return rvalue_visit(ir);
908 }
909
910 ir_expression *
911 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
912 {
913 if (expr->operation !=
914 ir_expression_operation(ir_unop_ssbo_unsized_array_length))
915 return NULL;
916
917 ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
918 if (!rvalue ||
919 !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
920 return NULL;
921
922 ir_dereference *deref = expr->operands[0]->as_dereference();
923 if (!deref)
924 return NULL;
925
926 ir_variable *var = expr->operands[0]->variable_referenced();
927 if (!var || !var->is_in_shader_storage_block())
928 return NULL;
929 return process_ssbo_unsized_array_length(&rvalue, deref, var);
930 }
931
932 void
933 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
934 {
935 if (ir->operation ==
936 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
937 /* Don't replace this unop if it is found alone. It is going to be
938 * removed by the optimization passes or replaced if it is part of
939 * an ir_assignment or another ir_expression.
940 */
941 return;
942 }
943
944 for (unsigned i = 0; i < ir->get_num_operands(); i++) {
945 if (ir->operands[i]->ir_type != ir_type_expression)
946 continue;
947 ir_expression *expr = (ir_expression *) ir->operands[i];
948 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
949 if (!temp)
950 continue;
951
952 delete expr;
953 ir->operands[i] = temp;
954 }
955 }
956
957 void
958 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
959 {
960 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
961 return;
962
963 ir_expression *expr = (ir_expression *) ir->rhs;
964 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
965 if (!temp)
966 return;
967
968 delete expr;
969 ir->rhs = temp;
970 return;
971 }
972
973 ir_expression *
974 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size()
975 {
976 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
977 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
978 glsl_type::int_type,
979 block_ref);
980 }
981
982 unsigned
983 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
984 unsigned packing)
985 {
986 unsigned array_stride = 0;
987
988 switch (deref->ir_type) {
989 case ir_type_dereference_variable:
990 {
991 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
992 const struct glsl_type *unsized_array_type = NULL;
993 /* An unsized array can be sized by other lowering passes, so pick
994 * the first field of the array which has the data type of the unsized
995 * array.
996 */
997 unsized_array_type = deref_var->var->type->fields.array;
998
999 /* Whether or not the field is row-major (because it might be a
1000 * bvec2 or something) does not affect the array itself. We need
1001 * to know whether an array element in its entirety is row-major.
1002 */
1003 const bool array_row_major =
1004 is_dereferenced_thing_row_major(deref_var);
1005
1006 if (packing == GLSL_INTERFACE_PACKING_STD430) {
1007 array_stride = unsized_array_type->std430_array_stride(array_row_major);
1008 } else {
1009 array_stride = unsized_array_type->std140_size(array_row_major);
1010 array_stride = glsl_align(array_stride, 16);
1011 }
1012 break;
1013 }
1014 case ir_type_dereference_record:
1015 {
1016 ir_dereference_record *deref_record = (ir_dereference_record *) deref;
1017 ir_dereference *interface_deref =
1018 deref_record->record->as_dereference();
1019 assert(interface_deref != NULL);
1020 const struct glsl_type *interface_type = interface_deref->type;
1021 unsigned record_length = interface_type->length;
1022 /* Unsized array is always the last element of the interface */
1023 const struct glsl_type *unsized_array_type =
1024 interface_type->fields.structure[record_length - 1].type->fields.array;
1025
1026 const bool array_row_major =
1027 is_dereferenced_thing_row_major(deref_record);
1028
1029 if (packing == GLSL_INTERFACE_PACKING_STD430) {
1030 array_stride = unsized_array_type->std430_array_stride(array_row_major);
1031 } else {
1032 array_stride = unsized_array_type->std140_size(array_row_major);
1033 array_stride = glsl_align(array_stride, 16);
1034 }
1035 break;
1036 }
1037 default:
1038 unreachable("Unsupported dereference type");
1039 }
1040 return array_stride;
1041 }
1042
1043 ir_expression *
1044 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
1045 ir_dereference *deref,
1046 ir_variable *var)
1047 {
1048 mem_ctx = ralloc_parent(*rvalue);
1049
1050 ir_rvalue *base_offset = NULL;
1051 unsigned const_offset;
1052 bool row_major;
1053 int matrix_columns;
1054 unsigned packing = var->get_interface_type()->interface_packing;
1055 int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
1056
1057 /* Compute the offset to the start if the dereference as well as other
1058 * information we need to calculate the length.
1059 */
1060 setup_for_load_or_store(var, deref,
1061 &base_offset, &const_offset,
1062 &row_major, &matrix_columns,
1063 packing);
1064 /* array.length() =
1065 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
1066 */
1067 ir_expression *buffer_size = emit_ssbo_get_buffer_size();
1068
1069 ir_expression *offset_of_array = new(mem_ctx)
1070 ir_expression(ir_binop_add, base_offset,
1071 new(mem_ctx) ir_constant(const_offset));
1072 ir_expression *offset_of_array_int = new(mem_ctx)
1073 ir_expression(ir_unop_u2i, offset_of_array);
1074
1075 ir_expression *sub = new(mem_ctx)
1076 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
1077 ir_expression *div = new(mem_ctx)
1078 ir_expression(ir_binop_div, sub,
1079 new(mem_ctx) ir_constant(unsized_array_stride));
1080 ir_expression *max = new(mem_ctx)
1081 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
1082
1083 return max;
1084 }
1085
1086 void
1087 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
1088 {
1089 if (!ir || !ir->lhs)
1090 return;
1091
1092 ir_rvalue *rvalue = ir->lhs->as_rvalue();
1093 if (!rvalue)
1094 return;
1095
1096 ir_dereference *deref = ir->lhs->as_dereference();
1097 if (!deref)
1098 return;
1099
1100 ir_variable *var = ir->lhs->variable_referenced();
1101 if (!var || !var->is_in_buffer_block())
1102 return;
1103
1104 /* We have a write to a buffer variable, so declare a temporary and rewrite
1105 * the assignment so that the temporary is the LHS.
1106 */
1107 mem_ctx = ralloc_parent(shader->ir);
1108
1109 const glsl_type *type = rvalue->type;
1110 ir_variable *write_var = new(mem_ctx) ir_variable(type,
1111 "ssbo_store_temp",
1112 ir_var_temporary);
1113 base_ir->insert_before(write_var);
1114 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
1115
1116 /* Now we have to write the value assigned to the temporary back to memory */
1117 write_to_memory(deref, var, write_var, ir->write_mask);
1118 progress = true;
1119 }
1120
1121
1122 ir_visitor_status
1123 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
1124 {
1125 check_ssbo_unsized_array_length_assignment(ir);
1126 check_for_ssbo_store(ir);
1127 return rvalue_visit(ir);
1128 }
1129
1130 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
1131 * access to the buffer variable in the first parameter by an offset
1132 * and block index. This involves creating the new internal intrinsic
1133 * (i.e. the new function signature).
1134 */
1135 ir_call *
1136 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
1137 {
1138 /* SSBO atomics usually have 2 parameters, the buffer variable and an
1139 * integer argument. The exception is CompSwap, that has an additional
1140 * integer parameter.
1141 */
1142 int param_count = ir->actual_parameters.length();
1143 assert(param_count == 2 || param_count == 3);
1144
1145 /* First argument must be a scalar integer buffer variable */
1146 exec_node *param = ir->actual_parameters.get_head();
1147 ir_instruction *inst = (ir_instruction *) param;
1148 assert(inst->ir_type == ir_type_dereference_variable ||
1149 inst->ir_type == ir_type_dereference_array ||
1150 inst->ir_type == ir_type_dereference_record ||
1151 inst->ir_type == ir_type_swizzle);
1152
1153 ir_rvalue *deref = (ir_rvalue *) inst;
1154 assert(deref->type->is_scalar() && deref->type->is_integer());
1155
1156 ir_variable *var = deref->variable_referenced();
1157 assert(var);
1158
1159 /* Compute the offset to the start if the dereference and the
1160 * block index
1161 */
1162 mem_ctx = ralloc_parent(shader->ir);
1163
1164 ir_rvalue *offset = NULL;
1165 unsigned const_offset;
1166 bool row_major;
1167 int matrix_columns;
1168 unsigned packing = var->get_interface_type()->interface_packing;
1169
1170 setup_for_load_or_store(var, deref,
1171 &offset, &const_offset,
1172 &row_major, &matrix_columns,
1173 packing);
1174 assert(offset);
1175 assert(!row_major);
1176 assert(matrix_columns == 1);
1177
1178 ir_rvalue *deref_offset =
1179 add(offset, new(mem_ctx) ir_constant(const_offset));
1180 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
1181
1182 /* Create the new internal function signature that will take a block
1183 * index and offset instead of a buffer variable
1184 */
1185 exec_list sig_params;
1186 ir_variable *sig_param = new(mem_ctx)
1187 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
1188 sig_params.push_tail(sig_param);
1189
1190 sig_param = new(mem_ctx)
1191 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
1192 sig_params.push_tail(sig_param);
1193
1194 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
1195 glsl_type::int_type : glsl_type::uint_type;
1196 sig_param = new(mem_ctx)
1197 ir_variable(type, "data1", ir_var_function_in);
1198 sig_params.push_tail(sig_param);
1199
1200 if (param_count == 3) {
1201 sig_param = new(mem_ctx)
1202 ir_variable(type, "data2", ir_var_function_in);
1203 sig_params.push_tail(sig_param);
1204 }
1205
1206 ir_function_signature *sig =
1207 new(mem_ctx) ir_function_signature(deref->type,
1208 shader_storage_buffer_object);
1209 assert(sig);
1210 sig->replace_parameters(&sig_params);
1211 sig->is_intrinsic = true;
1212
1213 char func_name[64];
1214 sprintf(func_name, "%s_internal", ir->callee_name());
1215 ir_function *f = new(mem_ctx) ir_function(func_name);
1216 f->add_signature(sig);
1217
1218 /* Now, create the call to the internal intrinsic */
1219 exec_list call_params;
1220 call_params.push_tail(block_index);
1221 call_params.push_tail(deref_offset);
1222 param = ir->actual_parameters.get_head()->get_next();
1223 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1224 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1225 if (param_count == 3) {
1226 param = param->get_next();
1227 param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1228 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1229 }
1230 ir_dereference_variable *return_deref =
1231 ir->return_deref->clone(mem_ctx, NULL);
1232 return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1233 }
1234
1235 ir_call *
1236 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1237 {
1238 const char *callee = ir->callee_name();
1239 if (!strcmp("__intrinsic_ssbo_atomic_add", callee) ||
1240 !strcmp("__intrinsic_ssbo_atomic_min", callee) ||
1241 !strcmp("__intrinsic_ssbo_atomic_max", callee) ||
1242 !strcmp("__intrinsic_ssbo_atomic_and", callee) ||
1243 !strcmp("__intrinsic_ssbo_atomic_or", callee) ||
1244 !strcmp("__intrinsic_ssbo_atomic_xor", callee) ||
1245 !strcmp("__intrinsic_ssbo_atomic_exchange", callee) ||
1246 !strcmp("__intrinsic_ssbo_atomic_comp_swap", callee)) {
1247 return lower_ssbo_atomic_intrinsic(ir);
1248 }
1249
1250 return ir;
1251 }
1252
1253
1254 ir_visitor_status
1255 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1256 {
1257 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1258 if (new_ir != ir) {
1259 progress = true;
1260 base_ir->replace_with(new_ir);
1261 return visit_continue_with_parent;
1262 }
1263
1264 return rvalue_visit(ir);
1265 }
1266
1267
1268 } /* unnamed namespace */
1269
1270 void
1271 lower_ubo_reference(struct gl_shader *shader, exec_list *instructions)
1272 {
1273 lower_ubo_reference_visitor v(shader);
1274
1275 /* Loop over the instructions lowering references, because we take
1276 * a deref of a UBO array using a UBO dereference as the index will
1277 * produce a collection of instructions all of which have cloned
1278 * UBO dereferences for that array index.
1279 */
1280 do {
1281 v.progress = false;
1282 visit_list_elements(&v, instructions);
1283 } while (v.progress);
1284 }