Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / compiler / glsl / lower_ubo_reference.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_ubo_reference.cpp
26 *
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
30 *
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
33 * their own.
34 */
35
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40
41 using namespace ir_builder;
42
43 namespace {
44 class lower_ubo_reference_visitor :
45 public lower_buffer_access::lower_buffer_access {
46 public:
47 lower_ubo_reference_visitor(struct gl_shader *shader)
48 : shader(shader), struct_field(NULL), variable(NULL)
49 {
50 }
51
52 void handle_rvalue(ir_rvalue **rvalue);
53 ir_visitor_status visit_enter(ir_assignment *ir);
54
55 void setup_for_load_or_store(void *mem_ctx,
56 ir_variable *var,
57 ir_rvalue *deref,
58 ir_rvalue **offset,
59 unsigned *const_offset,
60 bool *row_major,
61 int *matrix_columns,
62 unsigned packing);
63 uint32_t ssbo_access_params();
64 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
65 ir_rvalue *offset);
66 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
67 ir_rvalue *offset);
68
69 bool check_for_buffer_array_copy(ir_assignment *ir);
70 bool check_for_buffer_struct_copy(ir_assignment *ir);
71 void check_for_ssbo_store(ir_assignment *ir);
72 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
73 ir_variable *write_var, unsigned write_mask);
74 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
75 unsigned write_mask);
76
77 enum {
78 ubo_load_access,
79 ssbo_load_access,
80 ssbo_store_access,
81 ssbo_unsized_array_length_access,
82 ssbo_atomic_access,
83 } buffer_access_type;
84
85 void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
86 const glsl_type *type, ir_rvalue *offset,
87 unsigned mask, int channel);
88
89 ir_visitor_status visit_enter(class ir_expression *);
90 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
91 void check_ssbo_unsized_array_length_expression(class ir_expression *);
92 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
93
94 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
95 ir_dereference *,
96 ir_variable *);
97 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
98
99 unsigned calculate_unsized_array_stride(ir_dereference *deref,
100 unsigned packing);
101
102 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
103 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
104 ir_visitor_status visit_enter(ir_call *ir);
105
106 struct gl_shader *shader;
107 struct gl_uniform_buffer_variable *ubo_var;
108 const struct glsl_struct_field *struct_field;
109 ir_variable *variable;
110 ir_rvalue *uniform_block;
111 bool progress;
112 };
113
114 /**
115 * Determine the name of the interface block field
116 *
117 * This is the name of the specific member as it would appear in the
118 * \c gl_uniform_buffer_variable::Name field in the shader's
119 * \c UniformBlocks array.
120 */
121 static const char *
122 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
123 ir_rvalue **nonconst_block_index)
124 {
125 *nonconst_block_index = NULL;
126 char *name_copy = NULL;
127 size_t base_length = 0;
128
129 /* Loop back through the IR until we find the uniform block */
130 ir_rvalue *ir = d;
131 while (ir != NULL) {
132 switch (ir->ir_type) {
133 case ir_type_dereference_variable: {
134 /* Exit loop */
135 ir = NULL;
136 break;
137 }
138
139 case ir_type_dereference_record: {
140 ir_dereference_record *r = (ir_dereference_record *) ir;
141 ir = r->record->as_dereference();
142
143 /* If we got here it means any previous array subscripts belong to
144 * block members and not the block itself so skip over them in the
145 * next pass.
146 */
147 d = ir;
148 break;
149 }
150
151 case ir_type_dereference_array: {
152 ir_dereference_array *a = (ir_dereference_array *) ir;
153 ir = a->array->as_dereference();
154 break;
155 }
156
157 case ir_type_swizzle: {
158 ir_swizzle *s = (ir_swizzle *) ir;
159 ir = s->val->as_dereference();
160 /* Skip swizzle in the next pass */
161 d = ir;
162 break;
163 }
164
165 default:
166 assert(!"Should not get here.");
167 break;
168 }
169 }
170
171 while (d != NULL) {
172 switch (d->ir_type) {
173 case ir_type_dereference_variable: {
174 ir_dereference_variable *v = (ir_dereference_variable *) d;
175 if (name_copy != NULL &&
176 v->var->is_interface_instance() &&
177 v->var->type->is_array()) {
178 return name_copy;
179 } else {
180 *nonconst_block_index = NULL;
181 return base_name;
182 }
183
184 break;
185 }
186
187 case ir_type_dereference_array: {
188 ir_dereference_array *a = (ir_dereference_array *) d;
189 size_t new_length;
190
191 if (name_copy == NULL) {
192 name_copy = ralloc_strdup(mem_ctx, base_name);
193 base_length = strlen(name_copy);
194 }
195
196 /* For arrays of arrays we start at the innermost array and work our
197 * way out so we need to insert the subscript at the base of the
198 * name string rather than just attaching it to the end.
199 */
200 new_length = base_length;
201 ir_constant *const_index = a->array_index->as_constant();
202 char *end = ralloc_strdup(NULL, &name_copy[new_length]);
203 if (!const_index) {
204 ir_rvalue *array_index = a->array_index;
205 if (array_index->type != glsl_type::uint_type)
206 array_index = i2u(array_index);
207
208 if (a->array->type->is_array() &&
209 a->array->type->fields.array->is_array()) {
210 ir_constant *base_size = new(mem_ctx)
211 ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
212 array_index = mul(array_index, base_size);
213 }
214
215 if (*nonconst_block_index) {
216 *nonconst_block_index = add(*nonconst_block_index, array_index);
217 } else {
218 *nonconst_block_index = array_index;
219 }
220
221 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
222 end);
223 } else {
224 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
225 const_index->get_uint_component(0),
226 end);
227 }
228 ralloc_free(end);
229
230 d = a->array->as_dereference();
231
232 break;
233 }
234
235 default:
236 assert(!"Should not get here.");
237 break;
238 }
239 }
240
241 assert(!"Should not get here.");
242 return NULL;
243 }
244
245 void
246 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
247 ir_variable *var,
248 ir_rvalue *deref,
249 ir_rvalue **offset,
250 unsigned *const_offset,
251 bool *row_major,
252 int *matrix_columns,
253 unsigned packing)
254 {
255 /* Determine the name of the interface block */
256 ir_rvalue *nonconst_block_index;
257 const char *const field_name =
258 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
259 deref, &nonconst_block_index);
260
261 /* Locate the block by interface name */
262 unsigned num_blocks;
263 struct gl_uniform_block **blocks;
264 if (this->buffer_access_type != ubo_load_access) {
265 num_blocks = shader->NumShaderStorageBlocks;
266 blocks = shader->ShaderStorageBlocks;
267 } else {
268 num_blocks = shader->NumUniformBlocks;
269 blocks = shader->UniformBlocks;
270 }
271 this->uniform_block = NULL;
272 for (unsigned i = 0; i < num_blocks; i++) {
273 if (strcmp(field_name, blocks[i]->Name) == 0) {
274
275 ir_constant *index = new(mem_ctx) ir_constant(i);
276
277 if (nonconst_block_index) {
278 this->uniform_block = add(nonconst_block_index, index);
279 } else {
280 this->uniform_block = index;
281 }
282
283 this->ubo_var = var->is_interface_instance()
284 ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
285
286 break;
287 }
288 }
289
290 assert(this->uniform_block);
291
292 *const_offset = ubo_var->Offset;
293
294 this->struct_field = NULL;
295 setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
296 matrix_columns, &this->struct_field, packing);
297 }
298
299 void
300 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
301 {
302 if (!*rvalue)
303 return;
304
305 ir_dereference *deref = (*rvalue)->as_dereference();
306 if (!deref)
307 return;
308
309 ir_variable *var = deref->variable_referenced();
310 if (!var || !var->is_in_buffer_block())
311 return;
312
313 void *mem_ctx = ralloc_parent(shader->ir);
314
315 ir_rvalue *offset = NULL;
316 unsigned const_offset;
317 bool row_major;
318 int matrix_columns;
319 unsigned packing = var->get_interface_type()->interface_packing;
320
321 this->buffer_access_type =
322 var->is_in_shader_storage_block() ?
323 ssbo_load_access : ubo_load_access;
324 this->variable = var;
325
326 /* Compute the offset to the start if the dereference as well as other
327 * information we need to configure the write
328 */
329 setup_for_load_or_store(mem_ctx, var, deref,
330 &offset, &const_offset,
331 &row_major, &matrix_columns,
332 packing);
333 assert(offset);
334
335 /* Now that we've calculated the offset to the start of the
336 * dereference, walk over the type and emit loads into a temporary.
337 */
338 const glsl_type *type = (*rvalue)->type;
339 ir_variable *load_var = new(mem_ctx) ir_variable(type,
340 "ubo_load_temp",
341 ir_var_temporary);
342 base_ir->insert_before(load_var);
343
344 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
345 "ubo_load_temp_offset",
346 ir_var_temporary);
347 base_ir->insert_before(load_offset);
348 base_ir->insert_before(assign(load_offset, offset));
349
350 deref = new(mem_ctx) ir_dereference_variable(load_var);
351 emit_access(mem_ctx, false, deref, load_offset, const_offset,
352 row_major, matrix_columns, packing, 0);
353 *rvalue = deref;
354
355 progress = true;
356 }
357
358 ir_expression *
359 lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
360 const glsl_type *type,
361 ir_rvalue *offset)
362 {
363 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
364 return new(mem_ctx)
365 ir_expression(ir_binop_ubo_load,
366 type,
367 block_ref,
368 offset);
369
370 }
371
372 static bool
373 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
374 {
375 return state->ARB_shader_storage_buffer_object_enable;
376 }
377
378 uint32_t
379 lower_ubo_reference_visitor::ssbo_access_params()
380 {
381 assert(variable);
382
383 if (variable->is_interface_instance()) {
384 assert(struct_field);
385
386 return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) |
387 (struct_field->image_restrict ? ACCESS_RESTRICT : 0) |
388 (struct_field->image_volatile ? ACCESS_VOLATILE : 0));
389 } else {
390 return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) |
391 (variable->data.image_restrict ? ACCESS_RESTRICT : 0) |
392 (variable->data.image_volatile ? ACCESS_VOLATILE : 0));
393 }
394 }
395
396 ir_call *
397 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
398 ir_rvalue *deref,
399 ir_rvalue *offset,
400 unsigned write_mask)
401 {
402 exec_list sig_params;
403
404 ir_variable *block_ref = new(mem_ctx)
405 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
406 sig_params.push_tail(block_ref);
407
408 ir_variable *offset_ref = new(mem_ctx)
409 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
410 sig_params.push_tail(offset_ref);
411
412 ir_variable *val_ref = new(mem_ctx)
413 ir_variable(deref->type, "value" , ir_var_function_in);
414 sig_params.push_tail(val_ref);
415
416 ir_variable *writemask_ref = new(mem_ctx)
417 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
418 sig_params.push_tail(writemask_ref);
419
420 ir_variable *access_ref = new(mem_ctx)
421 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
422 sig_params.push_tail(access_ref);
423
424 ir_function_signature *sig = new(mem_ctx)
425 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
426 assert(sig);
427 sig->replace_parameters(&sig_params);
428 sig->is_intrinsic = true;
429
430 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
431 f->add_signature(sig);
432
433 exec_list call_params;
434 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
435 call_params.push_tail(offset->clone(mem_ctx, NULL));
436 call_params.push_tail(deref->clone(mem_ctx, NULL));
437 call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
438 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
439 return new(mem_ctx) ir_call(sig, NULL, &call_params);
440 }
441
442 ir_call *
443 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
444 const struct glsl_type *type,
445 ir_rvalue *offset)
446 {
447 exec_list sig_params;
448
449 ir_variable *block_ref = new(mem_ctx)
450 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
451 sig_params.push_tail(block_ref);
452
453 ir_variable *offset_ref = new(mem_ctx)
454 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
455 sig_params.push_tail(offset_ref);
456
457 ir_variable *access_ref = new(mem_ctx)
458 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
459 sig_params.push_tail(access_ref);
460
461 ir_function_signature *sig =
462 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
463 assert(sig);
464 sig->replace_parameters(&sig_params);
465 sig->is_intrinsic = true;
466
467 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
468 f->add_signature(sig);
469
470 ir_variable *result = new(mem_ctx)
471 ir_variable(type, "ssbo_load_result", ir_var_temporary);
472 base_ir->insert_before(result);
473 ir_dereference_variable *deref_result = new(mem_ctx)
474 ir_dereference_variable(result);
475
476 exec_list call_params;
477 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
478 call_params.push_tail(offset->clone(mem_ctx, NULL));
479 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
480
481 return new(mem_ctx) ir_call(sig, deref_result, &call_params);
482 }
483
484 void
485 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
486 ir_dereference *deref,
487 const glsl_type *type,
488 ir_rvalue *offset,
489 unsigned mask,
490 int channel)
491 {
492 switch (this->buffer_access_type) {
493 case ubo_load_access:
494 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
495 ubo_load(mem_ctx, type, offset),
496 mask));
497 break;
498 case ssbo_load_access: {
499 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
500 base_ir->insert_before(load_ssbo);
501 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
502 ir_assignment *assignment =
503 assign(deref->clone(mem_ctx, NULL), value, mask);
504 base_ir->insert_before(assignment);
505 break;
506 }
507 case ssbo_store_access:
508 if (channel >= 0) {
509 base_ir->insert_after(ssbo_store(mem_ctx,
510 swizzle(deref, channel, 1),
511 offset, 1));
512 } else {
513 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
514 }
515 break;
516 default:
517 unreachable("invalid buffer_access_type in insert_buffer_access");
518 }
519 }
520
521 void
522 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
523 ir_dereference *deref,
524 ir_variable *var,
525 ir_variable *write_var,
526 unsigned write_mask)
527 {
528 ir_rvalue *offset = NULL;
529 unsigned const_offset;
530 bool row_major;
531 int matrix_columns;
532 unsigned packing = var->get_interface_type()->interface_packing;
533
534 this->buffer_access_type = ssbo_store_access;
535 this->variable = var;
536
537 /* Compute the offset to the start if the dereference as well as other
538 * information we need to configure the write
539 */
540 setup_for_load_or_store(mem_ctx, var, deref,
541 &offset, &const_offset,
542 &row_major, &matrix_columns,
543 packing);
544 assert(offset);
545
546 /* Now emit writes from the temporary to memory */
547 ir_variable *write_offset =
548 new(mem_ctx) ir_variable(glsl_type::uint_type,
549 "ssbo_store_temp_offset",
550 ir_var_temporary);
551
552 base_ir->insert_before(write_offset);
553 base_ir->insert_before(assign(write_offset, offset));
554
555 deref = new(mem_ctx) ir_dereference_variable(write_var);
556 emit_access(mem_ctx, true, deref, write_offset, const_offset,
557 row_major, matrix_columns, packing, write_mask);
558 }
559
560 ir_visitor_status
561 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
562 {
563 check_ssbo_unsized_array_length_expression(ir);
564 return rvalue_visit(ir);
565 }
566
567 ir_expression *
568 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
569 {
570 if (expr->operation !=
571 ir_expression_operation(ir_unop_ssbo_unsized_array_length))
572 return NULL;
573
574 ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
575 if (!rvalue ||
576 !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
577 return NULL;
578
579 ir_dereference *deref = expr->operands[0]->as_dereference();
580 if (!deref)
581 return NULL;
582
583 ir_variable *var = expr->operands[0]->variable_referenced();
584 if (!var || !var->is_in_shader_storage_block())
585 return NULL;
586 return process_ssbo_unsized_array_length(&rvalue, deref, var);
587 }
588
589 void
590 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
591 {
592 if (ir->operation ==
593 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
594 /* Don't replace this unop if it is found alone. It is going to be
595 * removed by the optimization passes or replaced if it is part of
596 * an ir_assignment or another ir_expression.
597 */
598 return;
599 }
600
601 for (unsigned i = 0; i < ir->get_num_operands(); i++) {
602 if (ir->operands[i]->ir_type != ir_type_expression)
603 continue;
604 ir_expression *expr = (ir_expression *) ir->operands[i];
605 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
606 if (!temp)
607 continue;
608
609 delete expr;
610 ir->operands[i] = temp;
611 }
612 }
613
614 void
615 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
616 {
617 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
618 return;
619
620 ir_expression *expr = (ir_expression *) ir->rhs;
621 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
622 if (!temp)
623 return;
624
625 delete expr;
626 ir->rhs = temp;
627 return;
628 }
629
630 ir_expression *
631 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
632 {
633 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
634 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
635 glsl_type::int_type,
636 block_ref);
637 }
638
639 unsigned
640 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
641 unsigned packing)
642 {
643 unsigned array_stride = 0;
644
645 switch (deref->ir_type) {
646 case ir_type_dereference_variable:
647 {
648 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
649 const struct glsl_type *unsized_array_type = NULL;
650 /* An unsized array can be sized by other lowering passes, so pick
651 * the first field of the array which has the data type of the unsized
652 * array.
653 */
654 unsized_array_type = deref_var->var->type->fields.array;
655
656 /* Whether or not the field is row-major (because it might be a
657 * bvec2 or something) does not affect the array itself. We need
658 * to know whether an array element in its entirety is row-major.
659 */
660 const bool array_row_major =
661 is_dereferenced_thing_row_major(deref_var);
662
663 if (packing == GLSL_INTERFACE_PACKING_STD430) {
664 array_stride = unsized_array_type->std430_array_stride(array_row_major);
665 } else {
666 array_stride = unsized_array_type->std140_size(array_row_major);
667 array_stride = glsl_align(array_stride, 16);
668 }
669 break;
670 }
671 case ir_type_dereference_record:
672 {
673 ir_dereference_record *deref_record = (ir_dereference_record *) deref;
674 ir_dereference *interface_deref =
675 deref_record->record->as_dereference();
676 assert(interface_deref != NULL);
677 const struct glsl_type *interface_type = interface_deref->type;
678 unsigned record_length = interface_type->length;
679 /* Unsized array is always the last element of the interface */
680 const struct glsl_type *unsized_array_type =
681 interface_type->fields.structure[record_length - 1].type->fields.array;
682
683 const bool array_row_major =
684 is_dereferenced_thing_row_major(deref_record);
685
686 if (packing == GLSL_INTERFACE_PACKING_STD430) {
687 array_stride = unsized_array_type->std430_array_stride(array_row_major);
688 } else {
689 array_stride = unsized_array_type->std140_size(array_row_major);
690 array_stride = glsl_align(array_stride, 16);
691 }
692 break;
693 }
694 default:
695 unreachable("Unsupported dereference type");
696 }
697 return array_stride;
698 }
699
700 ir_expression *
701 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
702 ir_dereference *deref,
703 ir_variable *var)
704 {
705 void *mem_ctx = ralloc_parent(*rvalue);
706
707 ir_rvalue *base_offset = NULL;
708 unsigned const_offset;
709 bool row_major;
710 int matrix_columns;
711 unsigned packing = var->get_interface_type()->interface_packing;
712 int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
713
714 this->buffer_access_type = ssbo_unsized_array_length_access;
715 this->variable = var;
716
717 /* Compute the offset to the start if the dereference as well as other
718 * information we need to calculate the length.
719 */
720 setup_for_load_or_store(mem_ctx, var, deref,
721 &base_offset, &const_offset,
722 &row_major, &matrix_columns,
723 packing);
724 /* array.length() =
725 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
726 */
727 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
728
729 ir_expression *offset_of_array = new(mem_ctx)
730 ir_expression(ir_binop_add, base_offset,
731 new(mem_ctx) ir_constant(const_offset));
732 ir_expression *offset_of_array_int = new(mem_ctx)
733 ir_expression(ir_unop_u2i, offset_of_array);
734
735 ir_expression *sub = new(mem_ctx)
736 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
737 ir_expression *div = new(mem_ctx)
738 ir_expression(ir_binop_div, sub,
739 new(mem_ctx) ir_constant(unsized_array_stride));
740 ir_expression *max = new(mem_ctx)
741 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
742
743 return max;
744 }
745
746 void
747 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
748 {
749 if (!ir || !ir->lhs)
750 return;
751
752 ir_rvalue *rvalue = ir->lhs->as_rvalue();
753 if (!rvalue)
754 return;
755
756 ir_dereference *deref = ir->lhs->as_dereference();
757 if (!deref)
758 return;
759
760 ir_variable *var = ir->lhs->variable_referenced();
761 if (!var || !var->is_in_shader_storage_block())
762 return;
763
764 /* We have a write to a buffer variable, so declare a temporary and rewrite
765 * the assignment so that the temporary is the LHS.
766 */
767 void *mem_ctx = ralloc_parent(shader->ir);
768
769 const glsl_type *type = rvalue->type;
770 ir_variable *write_var = new(mem_ctx) ir_variable(type,
771 "ssbo_store_temp",
772 ir_var_temporary);
773 base_ir->insert_before(write_var);
774 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
775
776 /* Now we have to write the value assigned to the temporary back to memory */
777 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
778 progress = true;
779 }
780
781 static bool
782 is_buffer_backed_variable(ir_variable *var)
783 {
784 return var->is_in_buffer_block() ||
785 var->data.mode == ir_var_shader_shared;
786 }
787
788 bool
789 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
790 {
791 if (!ir || !ir->lhs || !ir->rhs)
792 return false;
793
794 /* LHS and RHS must be arrays
795 * FIXME: arrays of arrays?
796 */
797 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
798 return false;
799
800 /* RHS must be a buffer-backed variable. This is what can cause the problem
801 * since it would lead to a series of loads that need to live until we
802 * see the writes to the LHS.
803 */
804 ir_variable *rhs_var = ir->rhs->variable_referenced();
805 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
806 return false;
807
808 /* Split the array copy into individual element copies to reduce
809 * register pressure
810 */
811 ir_dereference *rhs_deref = ir->rhs->as_dereference();
812 if (!rhs_deref)
813 return false;
814
815 ir_dereference *lhs_deref = ir->lhs->as_dereference();
816 if (!lhs_deref)
817 return false;
818
819 assert(lhs_deref->type->length == rhs_deref->type->length);
820 void *mem_ctx = ralloc_parent(shader->ir);
821
822 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
823 ir_dereference *lhs_i =
824 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
825 new(mem_ctx) ir_constant(i));
826
827 ir_dereference *rhs_i =
828 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
829 new(mem_ctx) ir_constant(i));
830 ir->insert_after(assign(lhs_i, rhs_i));
831 }
832
833 ir->remove();
834 progress = true;
835 return true;
836 }
837
838 bool
839 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
840 {
841 if (!ir || !ir->lhs || !ir->rhs)
842 return false;
843
844 /* LHS and RHS must be records */
845 if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
846 return false;
847
848 /* RHS must be a buffer-backed variable. This is what can cause the problem
849 * since it would lead to a series of loads that need to live until we
850 * see the writes to the LHS.
851 */
852 ir_variable *rhs_var = ir->rhs->variable_referenced();
853 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
854 return false;
855
856 /* Split the struct copy into individual element copies to reduce
857 * register pressure
858 */
859 ir_dereference *rhs_deref = ir->rhs->as_dereference();
860 if (!rhs_deref)
861 return false;
862
863 ir_dereference *lhs_deref = ir->lhs->as_dereference();
864 if (!lhs_deref)
865 return false;
866
867 assert(lhs_deref->type->record_compare(rhs_deref->type));
868 void *mem_ctx = ralloc_parent(shader->ir);
869
870 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
871 const char *field_name = lhs_deref->type->fields.structure[i].name;
872 ir_dereference *lhs_field =
873 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
874 field_name);
875 ir_dereference *rhs_field =
876 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
877 field_name);
878 ir->insert_after(assign(lhs_field, rhs_field));
879 }
880
881 ir->remove();
882 progress = true;
883 return true;
884 }
885
886 ir_visitor_status
887 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
888 {
889 /* Array and struct copies could involve large amounts of load/store
890 * operations. To improve register pressure we want to special-case
891 * these and split them into individual element copies.
892 * This way we avoid emitting all the loads for the RHS first and
893 * all the writes for the LHS second and register usage is more
894 * efficient.
895 */
896 if (check_for_buffer_array_copy(ir))
897 return visit_continue_with_parent;
898
899 if (check_for_buffer_struct_copy(ir))
900 return visit_continue_with_parent;
901
902 check_ssbo_unsized_array_length_assignment(ir);
903 check_for_ssbo_store(ir);
904 return rvalue_visit(ir);
905 }
906
907 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
908 * access to the buffer variable in the first parameter by an offset
909 * and block index. This involves creating the new internal intrinsic
910 * (i.e. the new function signature).
911 */
912 ir_call *
913 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
914 {
915 /* SSBO atomics usually have 2 parameters, the buffer variable and an
916 * integer argument. The exception is CompSwap, that has an additional
917 * integer parameter.
918 */
919 int param_count = ir->actual_parameters.length();
920 assert(param_count == 2 || param_count == 3);
921
922 /* First argument must be a scalar integer buffer variable */
923 exec_node *param = ir->actual_parameters.get_head();
924 ir_instruction *inst = (ir_instruction *) param;
925 assert(inst->ir_type == ir_type_dereference_variable ||
926 inst->ir_type == ir_type_dereference_array ||
927 inst->ir_type == ir_type_dereference_record ||
928 inst->ir_type == ir_type_swizzle);
929
930 ir_rvalue *deref = (ir_rvalue *) inst;
931 assert(deref->type->is_scalar() && deref->type->is_integer());
932
933 ir_variable *var = deref->variable_referenced();
934 assert(var);
935
936 /* Compute the offset to the start if the dereference and the
937 * block index
938 */
939 void *mem_ctx = ralloc_parent(shader->ir);
940
941 ir_rvalue *offset = NULL;
942 unsigned const_offset;
943 bool row_major;
944 int matrix_columns;
945 unsigned packing = var->get_interface_type()->interface_packing;
946
947 this->buffer_access_type = ssbo_atomic_access;
948 this->variable = var;
949
950 setup_for_load_or_store(mem_ctx, var, deref,
951 &offset, &const_offset,
952 &row_major, &matrix_columns,
953 packing);
954 assert(offset);
955 assert(!row_major);
956 assert(matrix_columns == 1);
957
958 ir_rvalue *deref_offset =
959 add(offset, new(mem_ctx) ir_constant(const_offset));
960 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
961
962 /* Create the new internal function signature that will take a block
963 * index and offset instead of a buffer variable
964 */
965 exec_list sig_params;
966 ir_variable *sig_param = new(mem_ctx)
967 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
968 sig_params.push_tail(sig_param);
969
970 sig_param = new(mem_ctx)
971 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
972 sig_params.push_tail(sig_param);
973
974 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
975 glsl_type::int_type : glsl_type::uint_type;
976 sig_param = new(mem_ctx)
977 ir_variable(type, "data1", ir_var_function_in);
978 sig_params.push_tail(sig_param);
979
980 if (param_count == 3) {
981 sig_param = new(mem_ctx)
982 ir_variable(type, "data2", ir_var_function_in);
983 sig_params.push_tail(sig_param);
984 }
985
986 ir_function_signature *sig =
987 new(mem_ctx) ir_function_signature(deref->type,
988 shader_storage_buffer_object);
989 assert(sig);
990 sig->replace_parameters(&sig_params);
991 sig->is_intrinsic = true;
992
993 char func_name[64];
994 sprintf(func_name, "%s_ssbo", ir->callee_name());
995 ir_function *f = new(mem_ctx) ir_function(func_name);
996 f->add_signature(sig);
997
998 /* Now, create the call to the internal intrinsic */
999 exec_list call_params;
1000 call_params.push_tail(block_index);
1001 call_params.push_tail(deref_offset);
1002 param = ir->actual_parameters.get_head()->get_next();
1003 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1004 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1005 if (param_count == 3) {
1006 param = param->get_next();
1007 param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1008 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1009 }
1010 ir_dereference_variable *return_deref =
1011 ir->return_deref->clone(mem_ctx, NULL);
1012 return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1013 }
1014
1015 ir_call *
1016 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1017 {
1018 exec_list& params = ir->actual_parameters;
1019
1020 if (params.length() < 2 || params.length() > 3)
1021 return ir;
1022
1023 ir_rvalue *rvalue =
1024 ((ir_instruction *) params.get_head())->as_rvalue();
1025 if (!rvalue)
1026 return ir;
1027
1028 ir_variable *var = rvalue->variable_referenced();
1029 if (!var || !var->is_in_shader_storage_block())
1030 return ir;
1031
1032 const char *callee = ir->callee_name();
1033 if (!strcmp("__intrinsic_atomic_add", callee) ||
1034 !strcmp("__intrinsic_atomic_min", callee) ||
1035 !strcmp("__intrinsic_atomic_max", callee) ||
1036 !strcmp("__intrinsic_atomic_and", callee) ||
1037 !strcmp("__intrinsic_atomic_or", callee) ||
1038 !strcmp("__intrinsic_atomic_xor", callee) ||
1039 !strcmp("__intrinsic_atomic_exchange", callee) ||
1040 !strcmp("__intrinsic_atomic_comp_swap", callee)) {
1041 return lower_ssbo_atomic_intrinsic(ir);
1042 }
1043
1044 return ir;
1045 }
1046
1047
1048 ir_visitor_status
1049 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1050 {
1051 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1052 if (new_ir != ir) {
1053 progress = true;
1054 base_ir->replace_with(new_ir);
1055 return visit_continue_with_parent;
1056 }
1057
1058 return rvalue_visit(ir);
1059 }
1060
1061
1062 } /* unnamed namespace */
1063
1064 void
1065 lower_ubo_reference(struct gl_shader *shader)
1066 {
1067 lower_ubo_reference_visitor v(shader);
1068
1069 /* Loop over the instructions lowering references, because we take
1070 * a deref of a UBO array using a UBO dereference as the index will
1071 * produce a collection of instructions all of which have cloned
1072 * UBO dereferences for that array index.
1073 */
1074 do {
1075 v.progress = false;
1076 visit_list_elements(&v, shader->ir);
1077 } while (v.progress);
1078 }