2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file lower_ubo_reference.cpp
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
41 using namespace ir_builder
;
44 class lower_ubo_reference_visitor
:
45 public lower_buffer_access::lower_buffer_access
{
47 lower_ubo_reference_visitor(struct gl_shader
*shader
)
52 void handle_rvalue(ir_rvalue
**rvalue
);
53 ir_visitor_status
visit_enter(ir_assignment
*ir
);
55 void setup_for_load_or_store(void *mem_ctx
,
59 unsigned *const_offset
,
63 ir_expression
*ubo_load(void *mem_ctx
, const struct glsl_type
*type
,
65 ir_call
*ssbo_load(void *mem_ctx
, const struct glsl_type
*type
,
68 bool check_for_buffer_array_copy(ir_assignment
*ir
);
69 bool check_for_buffer_struct_copy(ir_assignment
*ir
);
70 void check_for_ssbo_store(ir_assignment
*ir
);
71 void write_to_memory(void *mem_ctx
, ir_dereference
*deref
, ir_variable
*var
,
72 ir_variable
*write_var
, unsigned write_mask
);
73 ir_call
*ssbo_store(void *mem_ctx
, ir_rvalue
*deref
, ir_rvalue
*offset
,
80 ssbo_unsized_array_length_access
,
84 void insert_buffer_access(void *mem_ctx
, ir_dereference
*deref
,
85 const glsl_type
*type
, ir_rvalue
*offset
,
86 unsigned mask
, int channel
);
88 ir_visitor_status
visit_enter(class ir_expression
*);
89 ir_expression
*calculate_ssbo_unsized_array_length(ir_expression
*expr
);
90 void check_ssbo_unsized_array_length_expression(class ir_expression
*);
91 void check_ssbo_unsized_array_length_assignment(ir_assignment
*ir
);
93 ir_expression
*process_ssbo_unsized_array_length(ir_rvalue
**,
96 ir_expression
*emit_ssbo_get_buffer_size(void *mem_ctx
);
98 unsigned calculate_unsized_array_stride(ir_dereference
*deref
,
101 ir_call
*lower_ssbo_atomic_intrinsic(ir_call
*ir
);
102 ir_call
*check_for_ssbo_atomic_intrinsic(ir_call
*ir
);
103 ir_visitor_status
visit_enter(ir_call
*ir
);
105 struct gl_shader
*shader
;
106 struct gl_uniform_buffer_variable
*ubo_var
;
107 ir_rvalue
*uniform_block
;
112 * Determine the name of the interface block field
114 * This is the name of the specific member as it would appear in the
115 * \c gl_uniform_buffer_variable::Name field in the shader's
116 * \c UniformBlocks array.
119 interface_field_name(void *mem_ctx
, char *base_name
, ir_rvalue
*d
,
120 ir_rvalue
**nonconst_block_index
)
122 *nonconst_block_index
= NULL
;
123 char *name_copy
= NULL
;
124 size_t base_length
= 0;
126 /* Loop back through the IR until we find the uniform block */
129 switch (ir
->ir_type
) {
130 case ir_type_dereference_variable
: {
136 case ir_type_dereference_record
: {
137 ir_dereference_record
*r
= (ir_dereference_record
*) ir
;
138 ir
= r
->record
->as_dereference();
140 /* If we got here it means any previous array subscripts belong to
141 * block members and not the block itself so skip over them in the
148 case ir_type_dereference_array
: {
149 ir_dereference_array
*a
= (ir_dereference_array
*) ir
;
150 ir
= a
->array
->as_dereference();
154 case ir_type_swizzle
: {
155 ir_swizzle
*s
= (ir_swizzle
*) ir
;
156 ir
= s
->val
->as_dereference();
157 /* Skip swizzle in the next pass */
163 assert(!"Should not get here.");
169 switch (d
->ir_type
) {
170 case ir_type_dereference_variable
: {
171 ir_dereference_variable
*v
= (ir_dereference_variable
*) d
;
172 if (name_copy
!= NULL
&&
173 v
->var
->is_interface_instance() &&
174 v
->var
->type
->is_array()) {
177 *nonconst_block_index
= NULL
;
184 case ir_type_dereference_array
: {
185 ir_dereference_array
*a
= (ir_dereference_array
*) d
;
188 if (name_copy
== NULL
) {
189 name_copy
= ralloc_strdup(mem_ctx
, base_name
);
190 base_length
= strlen(name_copy
);
193 /* For arrays of arrays we start at the innermost array and work our
194 * way out so we need to insert the subscript at the base of the
195 * name string rather than just attaching it to the end.
197 new_length
= base_length
;
198 ir_constant
*const_index
= a
->array_index
->as_constant();
199 char *end
= ralloc_strdup(NULL
, &name_copy
[new_length
]);
201 ir_rvalue
*array_index
= a
->array_index
;
202 if (array_index
->type
!= glsl_type::uint_type
)
203 array_index
= i2u(array_index
);
205 if (a
->array
->type
->is_array() &&
206 a
->array
->type
->fields
.array
->is_array()) {
207 ir_constant
*base_size
= new(mem_ctx
)
208 ir_constant(a
->array
->type
->fields
.array
->arrays_of_arrays_size());
209 array_index
= mul(array_index
, base_size
);
212 if (*nonconst_block_index
) {
213 *nonconst_block_index
= add(*nonconst_block_index
, array_index
);
215 *nonconst_block_index
= array_index
;
218 ralloc_asprintf_rewrite_tail(&name_copy
, &new_length
, "[0]%s",
221 ralloc_asprintf_rewrite_tail(&name_copy
, &new_length
, "[%d]%s",
222 const_index
->get_uint_component(0),
227 d
= a
->array
->as_dereference();
233 assert(!"Should not get here.");
238 assert(!"Should not get here.");
243 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx
,
247 unsigned *const_offset
,
252 /* Determine the name of the interface block */
253 ir_rvalue
*nonconst_block_index
;
254 const char *const field_name
=
255 interface_field_name(mem_ctx
, (char *) var
->get_interface_type()->name
,
256 deref
, &nonconst_block_index
);
258 /* Locate the block by interface name */
260 struct gl_uniform_block
**blocks
;
261 if (this->buffer_access_type
!= ubo_load_access
) {
262 num_blocks
= shader
->NumShaderStorageBlocks
;
263 blocks
= shader
->ShaderStorageBlocks
;
265 num_blocks
= shader
->NumUniformBlocks
;
266 blocks
= shader
->UniformBlocks
;
268 this->uniform_block
= NULL
;
269 for (unsigned i
= 0; i
< num_blocks
; i
++) {
270 if (strcmp(field_name
, blocks
[i
]->Name
) == 0) {
272 ir_constant
*index
= new(mem_ctx
) ir_constant(i
);
274 if (nonconst_block_index
) {
275 this->uniform_block
= add(nonconst_block_index
, index
);
277 this->uniform_block
= index
;
280 this->ubo_var
= var
->is_interface_instance()
281 ? &blocks
[i
]->Uniforms
[0] : &blocks
[i
]->Uniforms
[var
->data
.location
];
287 assert(this->uniform_block
);
289 *const_offset
= ubo_var
->Offset
;
291 setup_buffer_access(mem_ctx
, var
, deref
, offset
, const_offset
, row_major
,
292 matrix_columns
, packing
);
296 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue
**rvalue
)
301 ir_dereference
*deref
= (*rvalue
)->as_dereference();
305 ir_variable
*var
= deref
->variable_referenced();
306 if (!var
|| !var
->is_in_buffer_block())
309 void *mem_ctx
= ralloc_parent(shader
->ir
);
311 ir_rvalue
*offset
= NULL
;
312 unsigned const_offset
;
315 unsigned packing
= var
->get_interface_type()->interface_packing
;
317 this->buffer_access_type
=
318 var
->is_in_shader_storage_block() ?
319 ssbo_load_access
: ubo_load_access
;
321 /* Compute the offset to the start if the dereference as well as other
322 * information we need to configure the write
324 setup_for_load_or_store(mem_ctx
, var
, deref
,
325 &offset
, &const_offset
,
326 &row_major
, &matrix_columns
,
330 /* Now that we've calculated the offset to the start of the
331 * dereference, walk over the type and emit loads into a temporary.
333 const glsl_type
*type
= (*rvalue
)->type
;
334 ir_variable
*load_var
= new(mem_ctx
) ir_variable(type
,
337 base_ir
->insert_before(load_var
);
339 ir_variable
*load_offset
= new(mem_ctx
) ir_variable(glsl_type::uint_type
,
340 "ubo_load_temp_offset",
342 base_ir
->insert_before(load_offset
);
343 base_ir
->insert_before(assign(load_offset
, offset
));
345 deref
= new(mem_ctx
) ir_dereference_variable(load_var
);
346 emit_access(mem_ctx
, false, deref
, load_offset
, const_offset
,
347 row_major
, matrix_columns
, packing
, 0);
354 lower_ubo_reference_visitor::ubo_load(void *mem_ctx
,
355 const glsl_type
*type
,
358 ir_rvalue
*block_ref
= this->uniform_block
->clone(mem_ctx
, NULL
);
360 ir_expression(ir_binop_ubo_load
,
368 shader_storage_buffer_object(const _mesa_glsl_parse_state
*state
)
370 return state
->ARB_shader_storage_buffer_object_enable
;
374 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx
,
379 exec_list sig_params
;
381 ir_variable
*block_ref
= new(mem_ctx
)
382 ir_variable(glsl_type::uint_type
, "block_ref" , ir_var_function_in
);
383 sig_params
.push_tail(block_ref
);
385 ir_variable
*offset_ref
= new(mem_ctx
)
386 ir_variable(glsl_type::uint_type
, "offset" , ir_var_function_in
);
387 sig_params
.push_tail(offset_ref
);
389 ir_variable
*val_ref
= new(mem_ctx
)
390 ir_variable(deref
->type
, "value" , ir_var_function_in
);
391 sig_params
.push_tail(val_ref
);
393 ir_variable
*writemask_ref
= new(mem_ctx
)
394 ir_variable(glsl_type::uint_type
, "write_mask" , ir_var_function_in
);
395 sig_params
.push_tail(writemask_ref
);
397 ir_function_signature
*sig
= new(mem_ctx
)
398 ir_function_signature(glsl_type::void_type
, shader_storage_buffer_object
);
400 sig
->replace_parameters(&sig_params
);
401 sig
->is_intrinsic
= true;
403 ir_function
*f
= new(mem_ctx
) ir_function("__intrinsic_store_ssbo");
404 f
->add_signature(sig
);
406 exec_list call_params
;
407 call_params
.push_tail(this->uniform_block
->clone(mem_ctx
, NULL
));
408 call_params
.push_tail(offset
->clone(mem_ctx
, NULL
));
409 call_params
.push_tail(deref
->clone(mem_ctx
, NULL
));
410 call_params
.push_tail(new(mem_ctx
) ir_constant(write_mask
));
411 return new(mem_ctx
) ir_call(sig
, NULL
, &call_params
);
415 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx
,
416 const struct glsl_type
*type
,
419 exec_list sig_params
;
421 ir_variable
*block_ref
= new(mem_ctx
)
422 ir_variable(glsl_type::uint_type
, "block_ref" , ir_var_function_in
);
423 sig_params
.push_tail(block_ref
);
425 ir_variable
*offset_ref
= new(mem_ctx
)
426 ir_variable(glsl_type::uint_type
, "offset_ref" , ir_var_function_in
);
427 sig_params
.push_tail(offset_ref
);
429 ir_function_signature
*sig
=
430 new(mem_ctx
) ir_function_signature(type
, shader_storage_buffer_object
);
432 sig
->replace_parameters(&sig_params
);
433 sig
->is_intrinsic
= true;
435 ir_function
*f
= new(mem_ctx
) ir_function("__intrinsic_load_ssbo");
436 f
->add_signature(sig
);
438 ir_variable
*result
= new(mem_ctx
)
439 ir_variable(type
, "ssbo_load_result", ir_var_temporary
);
440 base_ir
->insert_before(result
);
441 ir_dereference_variable
*deref_result
= new(mem_ctx
)
442 ir_dereference_variable(result
);
444 exec_list call_params
;
445 call_params
.push_tail(this->uniform_block
->clone(mem_ctx
, NULL
));
446 call_params
.push_tail(offset
->clone(mem_ctx
, NULL
));
448 return new(mem_ctx
) ir_call(sig
, deref_result
, &call_params
);
452 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx
,
453 ir_dereference
*deref
,
454 const glsl_type
*type
,
459 switch (this->buffer_access_type
) {
460 case ubo_load_access
:
461 base_ir
->insert_before(assign(deref
->clone(mem_ctx
, NULL
),
462 ubo_load(mem_ctx
, type
, offset
),
465 case ssbo_load_access
: {
466 ir_call
*load_ssbo
= ssbo_load(mem_ctx
, type
, offset
);
467 base_ir
->insert_before(load_ssbo
);
468 ir_rvalue
*value
= load_ssbo
->return_deref
->as_rvalue()->clone(mem_ctx
, NULL
);
469 ir_assignment
*assignment
=
470 assign(deref
->clone(mem_ctx
, NULL
), value
, mask
);
471 base_ir
->insert_before(assignment
);
474 case ssbo_store_access
:
476 base_ir
->insert_after(ssbo_store(mem_ctx
,
477 swizzle(deref
, channel
, 1),
480 base_ir
->insert_after(ssbo_store(mem_ctx
, deref
, offset
, mask
));
484 unreachable("invalid buffer_access_type in insert_buffer_access");
489 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx
,
490 ir_dereference
*deref
,
492 ir_variable
*write_var
,
495 ir_rvalue
*offset
= NULL
;
496 unsigned const_offset
;
499 unsigned packing
= var
->get_interface_type()->interface_packing
;
501 this->buffer_access_type
= ssbo_store_access
;
503 /* Compute the offset to the start if the dereference as well as other
504 * information we need to configure the write
506 setup_for_load_or_store(mem_ctx
, var
, deref
,
507 &offset
, &const_offset
,
508 &row_major
, &matrix_columns
,
512 /* Now emit writes from the temporary to memory */
513 ir_variable
*write_offset
=
514 new(mem_ctx
) ir_variable(glsl_type::uint_type
,
515 "ssbo_store_temp_offset",
518 base_ir
->insert_before(write_offset
);
519 base_ir
->insert_before(assign(write_offset
, offset
));
521 deref
= new(mem_ctx
) ir_dereference_variable(write_var
);
522 emit_access(mem_ctx
, true, deref
, write_offset
, const_offset
,
523 row_major
, matrix_columns
, packing
, write_mask
);
527 lower_ubo_reference_visitor::visit_enter(ir_expression
*ir
)
529 check_ssbo_unsized_array_length_expression(ir
);
530 return rvalue_visit(ir
);
534 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression
*expr
)
536 if (expr
->operation
!=
537 ir_expression_operation(ir_unop_ssbo_unsized_array_length
))
540 ir_rvalue
*rvalue
= expr
->operands
[0]->as_rvalue();
542 !rvalue
->type
->is_array() || !rvalue
->type
->is_unsized_array())
545 ir_dereference
*deref
= expr
->operands
[0]->as_dereference();
549 ir_variable
*var
= expr
->operands
[0]->variable_referenced();
550 if (!var
|| !var
->is_in_shader_storage_block())
552 return process_ssbo_unsized_array_length(&rvalue
, deref
, var
);
556 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression
*ir
)
559 ir_expression_operation(ir_unop_ssbo_unsized_array_length
)) {
560 /* Don't replace this unop if it is found alone. It is going to be
561 * removed by the optimization passes or replaced if it is part of
562 * an ir_assignment or another ir_expression.
567 for (unsigned i
= 0; i
< ir
->get_num_operands(); i
++) {
568 if (ir
->operands
[i
]->ir_type
!= ir_type_expression
)
570 ir_expression
*expr
= (ir_expression
*) ir
->operands
[i
];
571 ir_expression
*temp
= calculate_ssbo_unsized_array_length(expr
);
576 ir
->operands
[i
] = temp
;
581 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment
*ir
)
583 if (!ir
->rhs
|| ir
->rhs
->ir_type
!= ir_type_expression
)
586 ir_expression
*expr
= (ir_expression
*) ir
->rhs
;
587 ir_expression
*temp
= calculate_ssbo_unsized_array_length(expr
);
597 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx
)
599 ir_rvalue
*block_ref
= this->uniform_block
->clone(mem_ctx
, NULL
);
600 return new(mem_ctx
) ir_expression(ir_unop_get_buffer_size
,
606 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference
*deref
,
609 unsigned array_stride
= 0;
611 switch (deref
->ir_type
) {
612 case ir_type_dereference_variable
:
614 ir_dereference_variable
*deref_var
= (ir_dereference_variable
*)deref
;
615 const struct glsl_type
*unsized_array_type
= NULL
;
616 /* An unsized array can be sized by other lowering passes, so pick
617 * the first field of the array which has the data type of the unsized
620 unsized_array_type
= deref_var
->var
->type
->fields
.array
;
622 /* Whether or not the field is row-major (because it might be a
623 * bvec2 or something) does not affect the array itself. We need
624 * to know whether an array element in its entirety is row-major.
626 const bool array_row_major
=
627 is_dereferenced_thing_row_major(deref_var
);
629 if (packing
== GLSL_INTERFACE_PACKING_STD430
) {
630 array_stride
= unsized_array_type
->std430_array_stride(array_row_major
);
632 array_stride
= unsized_array_type
->std140_size(array_row_major
);
633 array_stride
= glsl_align(array_stride
, 16);
637 case ir_type_dereference_record
:
639 ir_dereference_record
*deref_record
= (ir_dereference_record
*) deref
;
640 ir_dereference
*interface_deref
=
641 deref_record
->record
->as_dereference();
642 assert(interface_deref
!= NULL
);
643 const struct glsl_type
*interface_type
= interface_deref
->type
;
644 unsigned record_length
= interface_type
->length
;
645 /* Unsized array is always the last element of the interface */
646 const struct glsl_type
*unsized_array_type
=
647 interface_type
->fields
.structure
[record_length
- 1].type
->fields
.array
;
649 const bool array_row_major
=
650 is_dereferenced_thing_row_major(deref_record
);
652 if (packing
== GLSL_INTERFACE_PACKING_STD430
) {
653 array_stride
= unsized_array_type
->std430_array_stride(array_row_major
);
655 array_stride
= unsized_array_type
->std140_size(array_row_major
);
656 array_stride
= glsl_align(array_stride
, 16);
661 unreachable("Unsupported dereference type");
667 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue
**rvalue
,
668 ir_dereference
*deref
,
671 void *mem_ctx
= ralloc_parent(*rvalue
);
673 ir_rvalue
*base_offset
= NULL
;
674 unsigned const_offset
;
677 unsigned packing
= var
->get_interface_type()->interface_packing
;
678 int unsized_array_stride
= calculate_unsized_array_stride(deref
, packing
);
680 this->buffer_access_type
= ssbo_unsized_array_length_access
;
682 /* Compute the offset to the start if the dereference as well as other
683 * information we need to calculate the length.
685 setup_for_load_or_store(mem_ctx
, var
, deref
,
686 &base_offset
, &const_offset
,
687 &row_major
, &matrix_columns
,
690 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
692 ir_expression
*buffer_size
= emit_ssbo_get_buffer_size(mem_ctx
);
694 ir_expression
*offset_of_array
= new(mem_ctx
)
695 ir_expression(ir_binop_add
, base_offset
,
696 new(mem_ctx
) ir_constant(const_offset
));
697 ir_expression
*offset_of_array_int
= new(mem_ctx
)
698 ir_expression(ir_unop_u2i
, offset_of_array
);
700 ir_expression
*sub
= new(mem_ctx
)
701 ir_expression(ir_binop_sub
, buffer_size
, offset_of_array_int
);
702 ir_expression
*div
= new(mem_ctx
)
703 ir_expression(ir_binop_div
, sub
,
704 new(mem_ctx
) ir_constant(unsized_array_stride
));
705 ir_expression
*max
= new(mem_ctx
)
706 ir_expression(ir_binop_max
, div
, new(mem_ctx
) ir_constant(0));
712 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment
*ir
)
717 ir_rvalue
*rvalue
= ir
->lhs
->as_rvalue();
721 ir_dereference
*deref
= ir
->lhs
->as_dereference();
725 ir_variable
*var
= ir
->lhs
->variable_referenced();
726 if (!var
|| !var
->is_in_shader_storage_block())
729 /* We have a write to a buffer variable, so declare a temporary and rewrite
730 * the assignment so that the temporary is the LHS.
732 void *mem_ctx
= ralloc_parent(shader
->ir
);
734 const glsl_type
*type
= rvalue
->type
;
735 ir_variable
*write_var
= new(mem_ctx
) ir_variable(type
,
738 base_ir
->insert_before(write_var
);
739 ir
->lhs
= new(mem_ctx
) ir_dereference_variable(write_var
);
741 /* Now we have to write the value assigned to the temporary back to memory */
742 write_to_memory(mem_ctx
, deref
, var
, write_var
, ir
->write_mask
);
747 is_buffer_backed_variable(ir_variable
*var
)
749 return var
->is_in_buffer_block() ||
750 var
->data
.mode
== ir_var_shader_shared
;
754 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment
*ir
)
756 if (!ir
|| !ir
->lhs
|| !ir
->rhs
)
759 /* LHS and RHS must be arrays
760 * FIXME: arrays of arrays?
762 if (!ir
->lhs
->type
->is_array() || !ir
->rhs
->type
->is_array())
765 /* RHS must be a buffer-backed variable. This is what can cause the problem
766 * since it would lead to a series of loads that need to live until we
767 * see the writes to the LHS.
769 ir_variable
*rhs_var
= ir
->rhs
->variable_referenced();
770 if (!rhs_var
|| !is_buffer_backed_variable(rhs_var
))
773 /* Split the array copy into individual element copies to reduce
776 ir_dereference
*rhs_deref
= ir
->rhs
->as_dereference();
780 ir_dereference
*lhs_deref
= ir
->lhs
->as_dereference();
784 assert(lhs_deref
->type
->length
== rhs_deref
->type
->length
);
785 void *mem_ctx
= ralloc_parent(shader
->ir
);
787 for (unsigned i
= 0; i
< lhs_deref
->type
->length
; i
++) {
788 ir_dereference
*lhs_i
=
789 new(mem_ctx
) ir_dereference_array(lhs_deref
->clone(mem_ctx
, NULL
),
790 new(mem_ctx
) ir_constant(i
));
792 ir_dereference
*rhs_i
=
793 new(mem_ctx
) ir_dereference_array(rhs_deref
->clone(mem_ctx
, NULL
),
794 new(mem_ctx
) ir_constant(i
));
795 ir
->insert_after(assign(lhs_i
, rhs_i
));
804 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment
*ir
)
806 if (!ir
|| !ir
->lhs
|| !ir
->rhs
)
809 /* LHS and RHS must be records */
810 if (!ir
->lhs
->type
->is_record() || !ir
->rhs
->type
->is_record())
813 /* RHS must be a buffer-backed variable. This is what can cause the problem
814 * since it would lead to a series of loads that need to live until we
815 * see the writes to the LHS.
817 ir_variable
*rhs_var
= ir
->rhs
->variable_referenced();
818 if (!rhs_var
|| !is_buffer_backed_variable(rhs_var
))
821 /* Split the struct copy into individual element copies to reduce
824 ir_dereference
*rhs_deref
= ir
->rhs
->as_dereference();
828 ir_dereference
*lhs_deref
= ir
->lhs
->as_dereference();
832 assert(lhs_deref
->type
->record_compare(rhs_deref
->type
));
833 void *mem_ctx
= ralloc_parent(shader
->ir
);
835 for (unsigned i
= 0; i
< lhs_deref
->type
->length
; i
++) {
836 const char *field_name
= lhs_deref
->type
->fields
.structure
[i
].name
;
837 ir_dereference
*lhs_field
=
838 new(mem_ctx
) ir_dereference_record(lhs_deref
->clone(mem_ctx
, NULL
),
840 ir_dereference
*rhs_field
=
841 new(mem_ctx
) ir_dereference_record(rhs_deref
->clone(mem_ctx
, NULL
),
843 ir
->insert_after(assign(lhs_field
, rhs_field
));
852 lower_ubo_reference_visitor::visit_enter(ir_assignment
*ir
)
854 /* Array and struct copies could involve large amounts of load/store
855 * operations. To improve register pressure we want to special-case
856 * these and split them into individual element copies.
857 * This way we avoid emitting all the loads for the RHS first and
858 * all the writes for the LHS second and register usage is more
861 if (check_for_buffer_array_copy(ir
))
862 return visit_continue_with_parent
;
864 if (check_for_buffer_struct_copy(ir
))
865 return visit_continue_with_parent
;
867 check_ssbo_unsized_array_length_assignment(ir
);
868 check_for_ssbo_store(ir
);
869 return rvalue_visit(ir
);
872 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
873 * access to the buffer variable in the first parameter by an offset
874 * and block index. This involves creating the new internal intrinsic
875 * (i.e. the new function signature).
878 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call
*ir
)
880 /* SSBO atomics usually have 2 parameters, the buffer variable and an
881 * integer argument. The exception is CompSwap, that has an additional
884 int param_count
= ir
->actual_parameters
.length();
885 assert(param_count
== 2 || param_count
== 3);
887 /* First argument must be a scalar integer buffer variable */
888 exec_node
*param
= ir
->actual_parameters
.get_head();
889 ir_instruction
*inst
= (ir_instruction
*) param
;
890 assert(inst
->ir_type
== ir_type_dereference_variable
||
891 inst
->ir_type
== ir_type_dereference_array
||
892 inst
->ir_type
== ir_type_dereference_record
||
893 inst
->ir_type
== ir_type_swizzle
);
895 ir_rvalue
*deref
= (ir_rvalue
*) inst
;
896 assert(deref
->type
->is_scalar() && deref
->type
->is_integer());
898 ir_variable
*var
= deref
->variable_referenced();
901 /* Compute the offset to the start if the dereference and the
904 void *mem_ctx
= ralloc_parent(shader
->ir
);
906 ir_rvalue
*offset
= NULL
;
907 unsigned const_offset
;
910 unsigned packing
= var
->get_interface_type()->interface_packing
;
912 this->buffer_access_type
= ssbo_atomic_access
;
914 setup_for_load_or_store(mem_ctx
, var
, deref
,
915 &offset
, &const_offset
,
916 &row_major
, &matrix_columns
,
920 assert(matrix_columns
== 1);
922 ir_rvalue
*deref_offset
=
923 add(offset
, new(mem_ctx
) ir_constant(const_offset
));
924 ir_rvalue
*block_index
= this->uniform_block
->clone(mem_ctx
, NULL
);
926 /* Create the new internal function signature that will take a block
927 * index and offset instead of a buffer variable
929 exec_list sig_params
;
930 ir_variable
*sig_param
= new(mem_ctx
)
931 ir_variable(glsl_type::uint_type
, "block_ref" , ir_var_function_in
);
932 sig_params
.push_tail(sig_param
);
934 sig_param
= new(mem_ctx
)
935 ir_variable(glsl_type::uint_type
, "offset" , ir_var_function_in
);
936 sig_params
.push_tail(sig_param
);
938 const glsl_type
*type
= deref
->type
->base_type
== GLSL_TYPE_INT
?
939 glsl_type::int_type
: glsl_type::uint_type
;
940 sig_param
= new(mem_ctx
)
941 ir_variable(type
, "data1", ir_var_function_in
);
942 sig_params
.push_tail(sig_param
);
944 if (param_count
== 3) {
945 sig_param
= new(mem_ctx
)
946 ir_variable(type
, "data2", ir_var_function_in
);
947 sig_params
.push_tail(sig_param
);
950 ir_function_signature
*sig
=
951 new(mem_ctx
) ir_function_signature(deref
->type
,
952 shader_storage_buffer_object
);
954 sig
->replace_parameters(&sig_params
);
955 sig
->is_intrinsic
= true;
958 sprintf(func_name
, "%s_ssbo", ir
->callee_name());
959 ir_function
*f
= new(mem_ctx
) ir_function(func_name
);
960 f
->add_signature(sig
);
962 /* Now, create the call to the internal intrinsic */
963 exec_list call_params
;
964 call_params
.push_tail(block_index
);
965 call_params
.push_tail(deref_offset
);
966 param
= ir
->actual_parameters
.get_head()->get_next();
967 ir_rvalue
*param_as_rvalue
= ((ir_instruction
*) param
)->as_rvalue();
968 call_params
.push_tail(param_as_rvalue
->clone(mem_ctx
, NULL
));
969 if (param_count
== 3) {
970 param
= param
->get_next();
971 param_as_rvalue
= ((ir_instruction
*) param
)->as_rvalue();
972 call_params
.push_tail(param_as_rvalue
->clone(mem_ctx
, NULL
));
974 ir_dereference_variable
*return_deref
=
975 ir
->return_deref
->clone(mem_ctx
, NULL
);
976 return new(mem_ctx
) ir_call(sig
, return_deref
, &call_params
);
980 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call
*ir
)
982 exec_list
& params
= ir
->actual_parameters
;
984 if (params
.length() < 2 || params
.length() > 3)
988 ((ir_instruction
*) params
.get_head())->as_rvalue();
992 ir_variable
*var
= rvalue
->variable_referenced();
993 if (!var
|| !var
->is_in_shader_storage_block())
996 const char *callee
= ir
->callee_name();
997 if (!strcmp("__intrinsic_atomic_add", callee
) ||
998 !strcmp("__intrinsic_atomic_min", callee
) ||
999 !strcmp("__intrinsic_atomic_max", callee
) ||
1000 !strcmp("__intrinsic_atomic_and", callee
) ||
1001 !strcmp("__intrinsic_atomic_or", callee
) ||
1002 !strcmp("__intrinsic_atomic_xor", callee
) ||
1003 !strcmp("__intrinsic_atomic_exchange", callee
) ||
1004 !strcmp("__intrinsic_atomic_comp_swap", callee
)) {
1005 return lower_ssbo_atomic_intrinsic(ir
);
1013 lower_ubo_reference_visitor::visit_enter(ir_call
*ir
)
1015 ir_call
*new_ir
= check_for_ssbo_atomic_intrinsic(ir
);
1018 base_ir
->replace_with(new_ir
);
1019 return visit_continue_with_parent
;
1022 return rvalue_visit(ir
);
1026 } /* unnamed namespace */
1029 lower_ubo_reference(struct gl_shader
*shader
)
1031 lower_ubo_reference_visitor
v(shader
);
1033 /* Loop over the instructions lowering references, because we take
1034 * a deref of a UBO array using a UBO dereference as the index will
1035 * produce a collection of instructions all of which have cloned
1036 * UBO dereferences for that array index.
1040 visit_list_elements(&v
, shader
->ir
);
1041 } while (v
.progress
);