Merge remote-tracking branch 'public/master' into vulkan
[mesa.git] / src / compiler / glsl / lower_ubo_reference.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_ubo_reference.cpp
26 *
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
30 *
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
33 * their own.
34 */
35
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40
41 using namespace ir_builder;
42
43 namespace {
44 class lower_ubo_reference_visitor :
45 public lower_buffer_access::lower_buffer_access {
46 public:
47 lower_ubo_reference_visitor(struct gl_shader *shader)
48 : shader(shader), struct_field(NULL), variable(NULL)
49 {
50 }
51
52 void handle_rvalue(ir_rvalue **rvalue);
53 ir_visitor_status visit_enter(ir_assignment *ir);
54
55 void setup_for_load_or_store(void *mem_ctx,
56 ir_variable *var,
57 ir_rvalue *deref,
58 ir_rvalue **offset,
59 unsigned *const_offset,
60 bool *row_major,
61 int *matrix_columns,
62 unsigned packing);
63 uint32_t ssbo_access_params();
64 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
65 ir_rvalue *offset);
66 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
67 ir_rvalue *offset);
68
69 bool check_for_buffer_array_copy(ir_assignment *ir);
70 bool check_for_buffer_struct_copy(ir_assignment *ir);
71 void check_for_ssbo_store(ir_assignment *ir);
72 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
73 ir_variable *write_var, unsigned write_mask);
74 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
75 unsigned write_mask);
76
77 enum {
78 ubo_load_access,
79 ssbo_load_access,
80 ssbo_store_access,
81 ssbo_unsized_array_length_access,
82 ssbo_atomic_access,
83 } buffer_access_type;
84
85 void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
86 const glsl_type *type, ir_rvalue *offset,
87 unsigned mask, int channel);
88
89 ir_visitor_status visit_enter(class ir_expression *);
90 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
91 void check_ssbo_unsized_array_length_expression(class ir_expression *);
92 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
93
94 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
95 ir_dereference *,
96 ir_variable *);
97 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
98
99 unsigned calculate_unsized_array_stride(ir_dereference *deref,
100 unsigned packing);
101
102 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
103 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
104 ir_visitor_status visit_enter(ir_call *ir);
105
106 struct gl_shader *shader;
107 struct gl_uniform_buffer_variable *ubo_var;
108 const struct glsl_struct_field *struct_field;
109 ir_variable *variable;
110 ir_rvalue *uniform_block;
111 bool progress;
112 };
113
114 /**
115 * Determine the name of the interface block field
116 *
117 * This is the name of the specific member as it would appear in the
118 * \c gl_uniform_buffer_variable::Name field in the shader's
119 * \c UniformBlocks array.
120 */
121 static const char *
122 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
123 ir_rvalue **nonconst_block_index)
124 {
125 *nonconst_block_index = NULL;
126 char *name_copy = NULL;
127 size_t base_length = 0;
128
129 /* Loop back through the IR until we find the uniform block */
130 ir_rvalue *ir = d;
131 while (ir != NULL) {
132 switch (ir->ir_type) {
133 case ir_type_dereference_variable: {
134 /* Exit loop */
135 ir = NULL;
136 break;
137 }
138
139 case ir_type_dereference_record: {
140 ir_dereference_record *r = (ir_dereference_record *) ir;
141 ir = r->record->as_dereference();
142
143 /* If we got here it means any previous array subscripts belong to
144 * block members and not the block itself so skip over them in the
145 * next pass.
146 */
147 d = ir;
148 break;
149 }
150
151 case ir_type_dereference_array: {
152 ir_dereference_array *a = (ir_dereference_array *) ir;
153 ir = a->array->as_dereference();
154 break;
155 }
156
157 case ir_type_swizzle: {
158 ir_swizzle *s = (ir_swizzle *) ir;
159 ir = s->val->as_dereference();
160 /* Skip swizzle in the next pass */
161 d = ir;
162 break;
163 }
164
165 default:
166 assert(!"Should not get here.");
167 break;
168 }
169 }
170
171 while (d != NULL) {
172 switch (d->ir_type) {
173 case ir_type_dereference_variable: {
174 ir_dereference_variable *v = (ir_dereference_variable *) d;
175 if (name_copy != NULL &&
176 v->var->is_interface_instance() &&
177 v->var->type->is_array()) {
178 return name_copy;
179 } else {
180 *nonconst_block_index = NULL;
181 return base_name;
182 }
183
184 break;
185 }
186
187 case ir_type_dereference_array: {
188 ir_dereference_array *a = (ir_dereference_array *) d;
189 size_t new_length;
190
191 if (name_copy == NULL) {
192 name_copy = ralloc_strdup(mem_ctx, base_name);
193 base_length = strlen(name_copy);
194 }
195
196 /* For arrays of arrays we start at the innermost array and work our
197 * way out so we need to insert the subscript at the base of the
198 * name string rather than just attaching it to the end.
199 */
200 new_length = base_length;
201 ir_constant *const_index = a->array_index->as_constant();
202 char *end = ralloc_strdup(NULL, &name_copy[new_length]);
203 if (!const_index) {
204 ir_rvalue *array_index = a->array_index;
205 if (array_index->type != glsl_type::uint_type)
206 array_index = i2u(array_index);
207
208 if (a->array->type->is_array() &&
209 a->array->type->fields.array->is_array()) {
210 ir_constant *base_size = new(mem_ctx)
211 ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
212 array_index = mul(array_index, base_size);
213 }
214
215 if (*nonconst_block_index) {
216 *nonconst_block_index = add(*nonconst_block_index, array_index);
217 } else {
218 *nonconst_block_index = array_index;
219 }
220
221 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
222 end);
223 } else {
224 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
225 const_index->get_uint_component(0),
226 end);
227 }
228 ralloc_free(end);
229
230 d = a->array->as_dereference();
231
232 break;
233 }
234
235 default:
236 assert(!"Should not get here.");
237 break;
238 }
239 }
240
241 assert(!"Should not get here.");
242 return NULL;
243 }
244
245 void
246 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
247 ir_variable *var,
248 ir_rvalue *deref,
249 ir_rvalue **offset,
250 unsigned *const_offset,
251 bool *row_major,
252 int *matrix_columns,
253 unsigned packing)
254 {
255 /* Determine the name of the interface block */
256 ir_rvalue *nonconst_block_index;
257 const char *const field_name =
258 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
259 deref, &nonconst_block_index);
260
261 /* Locate the block by interface name */
262 unsigned num_blocks;
263 struct gl_uniform_block **blocks;
264 if (this->buffer_access_type != ubo_load_access) {
265 num_blocks = shader->NumShaderStorageBlocks;
266 blocks = shader->ShaderStorageBlocks;
267 } else {
268 num_blocks = shader->NumUniformBlocks;
269 blocks = shader->UniformBlocks;
270 }
271 this->uniform_block = NULL;
272 for (unsigned i = 0; i < num_blocks; i++) {
273 if (strcmp(field_name, blocks[i]->Name) == 0) {
274
275 ir_constant *index = new(mem_ctx) ir_constant(i);
276
277 if (nonconst_block_index) {
278 this->uniform_block = add(nonconst_block_index, index);
279 } else {
280 this->uniform_block = index;
281 }
282
283 this->ubo_var = var->is_interface_instance()
284 ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
285
286 break;
287 }
288 }
289
290 assert(this->uniform_block);
291
292 *const_offset = ubo_var->Offset;
293
294 this->struct_field = NULL;
295 setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
296 matrix_columns, &this->struct_field, packing);
297 }
298
299 void
300 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
301 {
302 if (!*rvalue)
303 return;
304
305 ir_dereference *deref = (*rvalue)->as_dereference();
306 if (!deref)
307 return;
308
309 ir_variable *var = deref->variable_referenced();
310 if (!var || !var->is_in_buffer_block())
311 return;
312
313 void *mem_ctx = ralloc_parent(shader->ir);
314
315 ir_rvalue *offset = NULL;
316 unsigned const_offset;
317 bool row_major;
318 int matrix_columns;
319 unsigned packing = var->get_interface_type()->interface_packing;
320
321 this->buffer_access_type =
322 var->is_in_shader_storage_block() ?
323 ssbo_load_access : ubo_load_access;
324 this->variable = var;
325
326 /* Compute the offset to the start if the dereference as well as other
327 * information we need to configure the write
328 */
329 setup_for_load_or_store(mem_ctx, var, deref,
330 &offset, &const_offset,
331 &row_major, &matrix_columns,
332 packing);
333 assert(offset);
334
335 /* Now that we've calculated the offset to the start of the
336 * dereference, walk over the type and emit loads into a temporary.
337 */
338 const glsl_type *type = (*rvalue)->type;
339 ir_variable *load_var = new(mem_ctx) ir_variable(type,
340 "ubo_load_temp",
341 ir_var_temporary);
342 base_ir->insert_before(load_var);
343
344 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
345 "ubo_load_temp_offset",
346 ir_var_temporary);
347 base_ir->insert_before(load_offset);
348 base_ir->insert_before(assign(load_offset, offset));
349
350 deref = new(mem_ctx) ir_dereference_variable(load_var);
351 emit_access(mem_ctx, false, deref, load_offset, const_offset,
352 row_major, matrix_columns, packing, 0);
353 *rvalue = deref;
354
355 progress = true;
356 }
357
358 ir_expression *
359 lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
360 const glsl_type *type,
361 ir_rvalue *offset)
362 {
363 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
364 return new(mem_ctx)
365 ir_expression(ir_binop_ubo_load,
366 type,
367 block_ref,
368 offset);
369
370 }
371
372 static bool
373 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
374 {
375 return state->ARB_shader_storage_buffer_object_enable ||
376 state->is_version(430, 310);
377 }
378
379 uint32_t
380 lower_ubo_reference_visitor::ssbo_access_params()
381 {
382 assert(variable);
383
384 if (variable->is_interface_instance()) {
385 assert(struct_field);
386
387 return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) |
388 (struct_field->image_restrict ? ACCESS_RESTRICT : 0) |
389 (struct_field->image_volatile ? ACCESS_VOLATILE : 0));
390 } else {
391 return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) |
392 (variable->data.image_restrict ? ACCESS_RESTRICT : 0) |
393 (variable->data.image_volatile ? ACCESS_VOLATILE : 0));
394 }
395 }
396
397 ir_call *
398 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
399 ir_rvalue *deref,
400 ir_rvalue *offset,
401 unsigned write_mask)
402 {
403 exec_list sig_params;
404
405 ir_variable *block_ref = new(mem_ctx)
406 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
407 sig_params.push_tail(block_ref);
408
409 ir_variable *offset_ref = new(mem_ctx)
410 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
411 sig_params.push_tail(offset_ref);
412
413 ir_variable *val_ref = new(mem_ctx)
414 ir_variable(deref->type, "value" , ir_var_function_in);
415 sig_params.push_tail(val_ref);
416
417 ir_variable *writemask_ref = new(mem_ctx)
418 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
419 sig_params.push_tail(writemask_ref);
420
421 ir_variable *access_ref = new(mem_ctx)
422 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
423 sig_params.push_tail(access_ref);
424
425 ir_function_signature *sig = new(mem_ctx)
426 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
427 assert(sig);
428 sig->replace_parameters(&sig_params);
429 sig->is_intrinsic = true;
430
431 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
432 f->add_signature(sig);
433
434 exec_list call_params;
435 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
436 call_params.push_tail(offset->clone(mem_ctx, NULL));
437 call_params.push_tail(deref->clone(mem_ctx, NULL));
438 call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
439 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
440 return new(mem_ctx) ir_call(sig, NULL, &call_params);
441 }
442
443 ir_call *
444 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
445 const struct glsl_type *type,
446 ir_rvalue *offset)
447 {
448 exec_list sig_params;
449
450 ir_variable *block_ref = new(mem_ctx)
451 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
452 sig_params.push_tail(block_ref);
453
454 ir_variable *offset_ref = new(mem_ctx)
455 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
456 sig_params.push_tail(offset_ref);
457
458 ir_variable *access_ref = new(mem_ctx)
459 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
460 sig_params.push_tail(access_ref);
461
462 ir_function_signature *sig =
463 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
464 assert(sig);
465 sig->replace_parameters(&sig_params);
466 sig->is_intrinsic = true;
467
468 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
469 f->add_signature(sig);
470
471 ir_variable *result = new(mem_ctx)
472 ir_variable(type, "ssbo_load_result", ir_var_temporary);
473 base_ir->insert_before(result);
474 ir_dereference_variable *deref_result = new(mem_ctx)
475 ir_dereference_variable(result);
476
477 exec_list call_params;
478 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
479 call_params.push_tail(offset->clone(mem_ctx, NULL));
480 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
481
482 return new(mem_ctx) ir_call(sig, deref_result, &call_params);
483 }
484
485 void
486 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
487 ir_dereference *deref,
488 const glsl_type *type,
489 ir_rvalue *offset,
490 unsigned mask,
491 int channel)
492 {
493 switch (this->buffer_access_type) {
494 case ubo_load_access:
495 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
496 ubo_load(mem_ctx, type, offset),
497 mask));
498 break;
499 case ssbo_load_access: {
500 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
501 base_ir->insert_before(load_ssbo);
502 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
503 ir_assignment *assignment =
504 assign(deref->clone(mem_ctx, NULL), value, mask);
505 base_ir->insert_before(assignment);
506 break;
507 }
508 case ssbo_store_access:
509 if (channel >= 0) {
510 base_ir->insert_after(ssbo_store(mem_ctx,
511 swizzle(deref, channel, 1),
512 offset, 1));
513 } else {
514 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
515 }
516 break;
517 default:
518 unreachable("invalid buffer_access_type in insert_buffer_access");
519 }
520 }
521
522 void
523 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
524 ir_dereference *deref,
525 ir_variable *var,
526 ir_variable *write_var,
527 unsigned write_mask)
528 {
529 ir_rvalue *offset = NULL;
530 unsigned const_offset;
531 bool row_major;
532 int matrix_columns;
533 unsigned packing = var->get_interface_type()->interface_packing;
534
535 this->buffer_access_type = ssbo_store_access;
536 this->variable = var;
537
538 /* Compute the offset to the start if the dereference as well as other
539 * information we need to configure the write
540 */
541 setup_for_load_or_store(mem_ctx, var, deref,
542 &offset, &const_offset,
543 &row_major, &matrix_columns,
544 packing);
545 assert(offset);
546
547 /* Now emit writes from the temporary to memory */
548 ir_variable *write_offset =
549 new(mem_ctx) ir_variable(glsl_type::uint_type,
550 "ssbo_store_temp_offset",
551 ir_var_temporary);
552
553 base_ir->insert_before(write_offset);
554 base_ir->insert_before(assign(write_offset, offset));
555
556 deref = new(mem_ctx) ir_dereference_variable(write_var);
557 emit_access(mem_ctx, true, deref, write_offset, const_offset,
558 row_major, matrix_columns, packing, write_mask);
559 }
560
561 ir_visitor_status
562 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
563 {
564 check_ssbo_unsized_array_length_expression(ir);
565 return rvalue_visit(ir);
566 }
567
568 ir_expression *
569 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
570 {
571 if (expr->operation !=
572 ir_expression_operation(ir_unop_ssbo_unsized_array_length))
573 return NULL;
574
575 ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
576 if (!rvalue ||
577 !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
578 return NULL;
579
580 ir_dereference *deref = expr->operands[0]->as_dereference();
581 if (!deref)
582 return NULL;
583
584 ir_variable *var = expr->operands[0]->variable_referenced();
585 if (!var || !var->is_in_shader_storage_block())
586 return NULL;
587 return process_ssbo_unsized_array_length(&rvalue, deref, var);
588 }
589
590 void
591 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
592 {
593 if (ir->operation ==
594 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
595 /* Don't replace this unop if it is found alone. It is going to be
596 * removed by the optimization passes or replaced if it is part of
597 * an ir_assignment or another ir_expression.
598 */
599 return;
600 }
601
602 for (unsigned i = 0; i < ir->get_num_operands(); i++) {
603 if (ir->operands[i]->ir_type != ir_type_expression)
604 continue;
605 ir_expression *expr = (ir_expression *) ir->operands[i];
606 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
607 if (!temp)
608 continue;
609
610 delete expr;
611 ir->operands[i] = temp;
612 }
613 }
614
615 void
616 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
617 {
618 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
619 return;
620
621 ir_expression *expr = (ir_expression *) ir->rhs;
622 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
623 if (!temp)
624 return;
625
626 delete expr;
627 ir->rhs = temp;
628 return;
629 }
630
631 ir_expression *
632 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
633 {
634 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
635 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
636 glsl_type::int_type,
637 block_ref);
638 }
639
640 unsigned
641 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
642 unsigned packing)
643 {
644 unsigned array_stride = 0;
645
646 switch (deref->ir_type) {
647 case ir_type_dereference_variable:
648 {
649 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
650 const struct glsl_type *unsized_array_type = NULL;
651 /* An unsized array can be sized by other lowering passes, so pick
652 * the first field of the array which has the data type of the unsized
653 * array.
654 */
655 unsized_array_type = deref_var->var->type->fields.array;
656
657 /* Whether or not the field is row-major (because it might be a
658 * bvec2 or something) does not affect the array itself. We need
659 * to know whether an array element in its entirety is row-major.
660 */
661 const bool array_row_major =
662 is_dereferenced_thing_row_major(deref_var);
663
664 if (packing == GLSL_INTERFACE_PACKING_STD430) {
665 array_stride = unsized_array_type->std430_array_stride(array_row_major);
666 } else {
667 array_stride = unsized_array_type->std140_size(array_row_major);
668 array_stride = glsl_align(array_stride, 16);
669 }
670 break;
671 }
672 case ir_type_dereference_record:
673 {
674 ir_dereference_record *deref_record = (ir_dereference_record *) deref;
675 ir_dereference *interface_deref =
676 deref_record->record->as_dereference();
677 assert(interface_deref != NULL);
678 const struct glsl_type *interface_type = interface_deref->type;
679 unsigned record_length = interface_type->length;
680 /* Unsized array is always the last element of the interface */
681 const struct glsl_type *unsized_array_type =
682 interface_type->fields.structure[record_length - 1].type->fields.array;
683
684 const bool array_row_major =
685 is_dereferenced_thing_row_major(deref_record);
686
687 if (packing == GLSL_INTERFACE_PACKING_STD430) {
688 array_stride = unsized_array_type->std430_array_stride(array_row_major);
689 } else {
690 array_stride = unsized_array_type->std140_size(array_row_major);
691 array_stride = glsl_align(array_stride, 16);
692 }
693 break;
694 }
695 default:
696 unreachable("Unsupported dereference type");
697 }
698 return array_stride;
699 }
700
701 ir_expression *
702 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
703 ir_dereference *deref,
704 ir_variable *var)
705 {
706 void *mem_ctx = ralloc_parent(*rvalue);
707
708 ir_rvalue *base_offset = NULL;
709 unsigned const_offset;
710 bool row_major;
711 int matrix_columns;
712 unsigned packing = var->get_interface_type()->interface_packing;
713 int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
714
715 this->buffer_access_type = ssbo_unsized_array_length_access;
716 this->variable = var;
717
718 /* Compute the offset to the start if the dereference as well as other
719 * information we need to calculate the length.
720 */
721 setup_for_load_or_store(mem_ctx, var, deref,
722 &base_offset, &const_offset,
723 &row_major, &matrix_columns,
724 packing);
725 /* array.length() =
726 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
727 */
728 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
729
730 ir_expression *offset_of_array = new(mem_ctx)
731 ir_expression(ir_binop_add, base_offset,
732 new(mem_ctx) ir_constant(const_offset));
733 ir_expression *offset_of_array_int = new(mem_ctx)
734 ir_expression(ir_unop_u2i, offset_of_array);
735
736 ir_expression *sub = new(mem_ctx)
737 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
738 ir_expression *div = new(mem_ctx)
739 ir_expression(ir_binop_div, sub,
740 new(mem_ctx) ir_constant(unsized_array_stride));
741 ir_expression *max = new(mem_ctx)
742 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
743
744 return max;
745 }
746
747 void
748 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
749 {
750 if (!ir || !ir->lhs)
751 return;
752
753 ir_rvalue *rvalue = ir->lhs->as_rvalue();
754 if (!rvalue)
755 return;
756
757 ir_dereference *deref = ir->lhs->as_dereference();
758 if (!deref)
759 return;
760
761 ir_variable *var = ir->lhs->variable_referenced();
762 if (!var || !var->is_in_shader_storage_block())
763 return;
764
765 /* We have a write to a buffer variable, so declare a temporary and rewrite
766 * the assignment so that the temporary is the LHS.
767 */
768 void *mem_ctx = ralloc_parent(shader->ir);
769
770 const glsl_type *type = rvalue->type;
771 ir_variable *write_var = new(mem_ctx) ir_variable(type,
772 "ssbo_store_temp",
773 ir_var_temporary);
774 base_ir->insert_before(write_var);
775 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
776
777 /* Now we have to write the value assigned to the temporary back to memory */
778 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
779 progress = true;
780 }
781
782 static bool
783 is_buffer_backed_variable(ir_variable *var)
784 {
785 return var->is_in_buffer_block() ||
786 var->data.mode == ir_var_shader_shared;
787 }
788
789 bool
790 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
791 {
792 if (!ir || !ir->lhs || !ir->rhs)
793 return false;
794
795 /* LHS and RHS must be arrays
796 * FIXME: arrays of arrays?
797 */
798 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
799 return false;
800
801 /* RHS must be a buffer-backed variable. This is what can cause the problem
802 * since it would lead to a series of loads that need to live until we
803 * see the writes to the LHS.
804 */
805 ir_variable *rhs_var = ir->rhs->variable_referenced();
806 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
807 return false;
808
809 /* Split the array copy into individual element copies to reduce
810 * register pressure
811 */
812 ir_dereference *rhs_deref = ir->rhs->as_dereference();
813 if (!rhs_deref)
814 return false;
815
816 ir_dereference *lhs_deref = ir->lhs->as_dereference();
817 if (!lhs_deref)
818 return false;
819
820 assert(lhs_deref->type->length == rhs_deref->type->length);
821 void *mem_ctx = ralloc_parent(shader->ir);
822
823 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
824 ir_dereference *lhs_i =
825 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
826 new(mem_ctx) ir_constant(i));
827
828 ir_dereference *rhs_i =
829 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
830 new(mem_ctx) ir_constant(i));
831 ir->insert_after(assign(lhs_i, rhs_i));
832 }
833
834 ir->remove();
835 progress = true;
836 return true;
837 }
838
839 bool
840 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
841 {
842 if (!ir || !ir->lhs || !ir->rhs)
843 return false;
844
845 /* LHS and RHS must be records */
846 if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
847 return false;
848
849 /* RHS must be a buffer-backed variable. This is what can cause the problem
850 * since it would lead to a series of loads that need to live until we
851 * see the writes to the LHS.
852 */
853 ir_variable *rhs_var = ir->rhs->variable_referenced();
854 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
855 return false;
856
857 /* Split the struct copy into individual element copies to reduce
858 * register pressure
859 */
860 ir_dereference *rhs_deref = ir->rhs->as_dereference();
861 if (!rhs_deref)
862 return false;
863
864 ir_dereference *lhs_deref = ir->lhs->as_dereference();
865 if (!lhs_deref)
866 return false;
867
868 assert(lhs_deref->type->record_compare(rhs_deref->type));
869 void *mem_ctx = ralloc_parent(shader->ir);
870
871 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
872 const char *field_name = lhs_deref->type->fields.structure[i].name;
873 ir_dereference *lhs_field =
874 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
875 field_name);
876 ir_dereference *rhs_field =
877 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
878 field_name);
879 ir->insert_after(assign(lhs_field, rhs_field));
880 }
881
882 ir->remove();
883 progress = true;
884 return true;
885 }
886
887 ir_visitor_status
888 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
889 {
890 /* Array and struct copies could involve large amounts of load/store
891 * operations. To improve register pressure we want to special-case
892 * these and split them into individual element copies.
893 * This way we avoid emitting all the loads for the RHS first and
894 * all the writes for the LHS second and register usage is more
895 * efficient.
896 */
897 if (check_for_buffer_array_copy(ir))
898 return visit_continue_with_parent;
899
900 if (check_for_buffer_struct_copy(ir))
901 return visit_continue_with_parent;
902
903 check_ssbo_unsized_array_length_assignment(ir);
904 check_for_ssbo_store(ir);
905 return rvalue_visit(ir);
906 }
907
908 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
909 * access to the buffer variable in the first parameter by an offset
910 * and block index. This involves creating the new internal intrinsic
911 * (i.e. the new function signature).
912 */
913 ir_call *
914 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
915 {
916 /* SSBO atomics usually have 2 parameters, the buffer variable and an
917 * integer argument. The exception is CompSwap, that has an additional
918 * integer parameter.
919 */
920 int param_count = ir->actual_parameters.length();
921 assert(param_count == 2 || param_count == 3);
922
923 /* First argument must be a scalar integer buffer variable */
924 exec_node *param = ir->actual_parameters.get_head();
925 ir_instruction *inst = (ir_instruction *) param;
926 assert(inst->ir_type == ir_type_dereference_variable ||
927 inst->ir_type == ir_type_dereference_array ||
928 inst->ir_type == ir_type_dereference_record ||
929 inst->ir_type == ir_type_swizzle);
930
931 ir_rvalue *deref = (ir_rvalue *) inst;
932 assert(deref->type->is_scalar() && deref->type->is_integer());
933
934 ir_variable *var = deref->variable_referenced();
935 assert(var);
936
937 /* Compute the offset to the start if the dereference and the
938 * block index
939 */
940 void *mem_ctx = ralloc_parent(shader->ir);
941
942 ir_rvalue *offset = NULL;
943 unsigned const_offset;
944 bool row_major;
945 int matrix_columns;
946 unsigned packing = var->get_interface_type()->interface_packing;
947
948 this->buffer_access_type = ssbo_atomic_access;
949 this->variable = var;
950
951 setup_for_load_or_store(mem_ctx, var, deref,
952 &offset, &const_offset,
953 &row_major, &matrix_columns,
954 packing);
955 assert(offset);
956 assert(!row_major);
957 assert(matrix_columns == 1);
958
959 ir_rvalue *deref_offset =
960 add(offset, new(mem_ctx) ir_constant(const_offset));
961 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
962
963 /* Create the new internal function signature that will take a block
964 * index and offset instead of a buffer variable
965 */
966 exec_list sig_params;
967 ir_variable *sig_param = new(mem_ctx)
968 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
969 sig_params.push_tail(sig_param);
970
971 sig_param = new(mem_ctx)
972 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
973 sig_params.push_tail(sig_param);
974
975 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
976 glsl_type::int_type : glsl_type::uint_type;
977 sig_param = new(mem_ctx)
978 ir_variable(type, "data1", ir_var_function_in);
979 sig_params.push_tail(sig_param);
980
981 if (param_count == 3) {
982 sig_param = new(mem_ctx)
983 ir_variable(type, "data2", ir_var_function_in);
984 sig_params.push_tail(sig_param);
985 }
986
987 ir_function_signature *sig =
988 new(mem_ctx) ir_function_signature(deref->type,
989 shader_storage_buffer_object);
990 assert(sig);
991 sig->replace_parameters(&sig_params);
992 sig->is_intrinsic = true;
993
994 char func_name[64];
995 sprintf(func_name, "%s_ssbo", ir->callee_name());
996 ir_function *f = new(mem_ctx) ir_function(func_name);
997 f->add_signature(sig);
998
999 /* Now, create the call to the internal intrinsic */
1000 exec_list call_params;
1001 call_params.push_tail(block_index);
1002 call_params.push_tail(deref_offset);
1003 param = ir->actual_parameters.get_head()->get_next();
1004 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1005 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1006 if (param_count == 3) {
1007 param = param->get_next();
1008 param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1009 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1010 }
1011 ir_dereference_variable *return_deref =
1012 ir->return_deref->clone(mem_ctx, NULL);
1013 return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1014 }
1015
1016 ir_call *
1017 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1018 {
1019 exec_list& params = ir->actual_parameters;
1020
1021 if (params.length() < 2 || params.length() > 3)
1022 return ir;
1023
1024 ir_rvalue *rvalue =
1025 ((ir_instruction *) params.get_head())->as_rvalue();
1026 if (!rvalue)
1027 return ir;
1028
1029 ir_variable *var = rvalue->variable_referenced();
1030 if (!var || !var->is_in_shader_storage_block())
1031 return ir;
1032
1033 const char *callee = ir->callee_name();
1034 if (!strcmp("__intrinsic_atomic_add", callee) ||
1035 !strcmp("__intrinsic_atomic_min", callee) ||
1036 !strcmp("__intrinsic_atomic_max", callee) ||
1037 !strcmp("__intrinsic_atomic_and", callee) ||
1038 !strcmp("__intrinsic_atomic_or", callee) ||
1039 !strcmp("__intrinsic_atomic_xor", callee) ||
1040 !strcmp("__intrinsic_atomic_exchange", callee) ||
1041 !strcmp("__intrinsic_atomic_comp_swap", callee)) {
1042 return lower_ssbo_atomic_intrinsic(ir);
1043 }
1044
1045 return ir;
1046 }
1047
1048
1049 ir_visitor_status
1050 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1051 {
1052 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1053 if (new_ir != ir) {
1054 progress = true;
1055 base_ir->replace_with(new_ir);
1056 return visit_continue_with_parent;
1057 }
1058
1059 return rvalue_visit(ir);
1060 }
1061
1062
1063 } /* unnamed namespace */
1064
1065 void
1066 lower_ubo_reference(struct gl_shader *shader)
1067 {
1068 lower_ubo_reference_visitor v(shader);
1069
1070 /* Loop over the instructions lowering references, because we take
1071 * a deref of a UBO array using a UBO dereference as the index will
1072 * produce a collection of instructions all of which have cloned
1073 * UBO dereferences for that array index.
1074 */
1075 do {
1076 v.progress = false;
1077 visit_list_elements(&v, shader->ir);
1078 } while (v.progress);
1079 }