2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file lower_ubo_reference.cpp
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40 #include "main/mtypes.h"
42 using namespace ir_builder
;
45 class lower_ubo_reference_visitor
:
46 public lower_buffer_access::lower_buffer_access
{
48 lower_ubo_reference_visitor(struct gl_linked_shader
*shader
,
49 bool clamp_block_indices
,
50 bool use_std430_as_default
)
51 : shader(shader
), clamp_block_indices(clamp_block_indices
),
52 struct_field(NULL
), variable(NULL
)
54 this->use_std430_as_default
= use_std430_as_default
;
57 void handle_rvalue(ir_rvalue
**rvalue
);
58 ir_visitor_status
visit_enter(ir_assignment
*ir
);
60 void setup_for_load_or_store(void *mem_ctx
,
64 unsigned *const_offset
,
66 const glsl_type
**matrix_type
,
67 enum glsl_interface_packing packing
);
68 uint32_t ssbo_access_params();
69 ir_expression
*ubo_load(void *mem_ctx
, const struct glsl_type
*type
,
71 ir_call
*ssbo_load(void *mem_ctx
, const struct glsl_type
*type
,
74 bool check_for_buffer_array_copy(ir_assignment
*ir
);
75 bool check_for_buffer_struct_copy(ir_assignment
*ir
);
76 void check_for_ssbo_store(ir_assignment
*ir
);
77 void write_to_memory(void *mem_ctx
, ir_dereference
*deref
, ir_variable
*var
,
78 ir_variable
*write_var
, unsigned write_mask
);
79 ir_call
*ssbo_store(void *mem_ctx
, ir_rvalue
*deref
, ir_rvalue
*offset
,
86 ssbo_unsized_array_length_access
,
90 void insert_buffer_access(void *mem_ctx
, ir_dereference
*deref
,
91 const glsl_type
*type
, ir_rvalue
*offset
,
92 unsigned mask
, int channel
);
94 ir_visitor_status
visit_enter(class ir_expression
*);
95 ir_expression
*calculate_ssbo_unsized_array_length(ir_expression
*expr
);
96 void check_ssbo_unsized_array_length_expression(class ir_expression
*);
97 void check_ssbo_unsized_array_length_assignment(ir_assignment
*ir
);
99 ir_expression
*process_ssbo_unsized_array_length(ir_rvalue
**,
102 ir_expression
*emit_ssbo_get_buffer_size(void *mem_ctx
);
104 unsigned calculate_unsized_array_stride(ir_dereference
*deref
,
105 enum glsl_interface_packing packing
);
107 ir_call
*lower_ssbo_atomic_intrinsic(ir_call
*ir
);
108 ir_call
*check_for_ssbo_atomic_intrinsic(ir_call
*ir
);
109 ir_visitor_status
visit_enter(ir_call
*ir
);
110 ir_visitor_status
visit_enter(ir_texture
*ir
);
112 struct gl_linked_shader
*shader
;
113 bool clamp_block_indices
;
114 const struct glsl_struct_field
*struct_field
;
115 ir_variable
*variable
;
116 ir_rvalue
*uniform_block
;
121 * Determine the name of the interface block field
123 * This is the name of the specific member as it would appear in the
124 * \c gl_uniform_buffer_variable::Name field in the shader's
125 * \c UniformBlocks array.
128 interface_field_name(void *mem_ctx
, char *base_name
, ir_rvalue
*d
,
129 ir_rvalue
**nonconst_block_index
)
131 *nonconst_block_index
= NULL
;
132 char *name_copy
= NULL
;
133 size_t base_length
= 0;
135 /* Loop back through the IR until we find the uniform block */
138 switch (ir
->ir_type
) {
139 case ir_type_dereference_variable
: {
145 case ir_type_dereference_record
: {
146 ir_dereference_record
*r
= (ir_dereference_record
*) ir
;
147 ir
= r
->record
->as_dereference();
149 /* If we got here it means any previous array subscripts belong to
150 * block members and not the block itself so skip over them in the
157 case ir_type_dereference_array
: {
158 ir_dereference_array
*a
= (ir_dereference_array
*) ir
;
159 ir
= a
->array
->as_dereference();
163 case ir_type_swizzle
: {
164 ir_swizzle
*s
= (ir_swizzle
*) ir
;
165 ir
= s
->val
->as_dereference();
166 /* Skip swizzle in the next pass */
172 assert(!"Should not get here.");
178 switch (d
->ir_type
) {
179 case ir_type_dereference_variable
: {
180 ir_dereference_variable
*v
= (ir_dereference_variable
*) d
;
181 if (name_copy
!= NULL
&&
182 v
->var
->is_interface_instance() &&
183 v
->var
->type
->is_array()) {
186 *nonconst_block_index
= NULL
;
193 case ir_type_dereference_array
: {
194 ir_dereference_array
*a
= (ir_dereference_array
*) d
;
197 if (name_copy
== NULL
) {
198 name_copy
= ralloc_strdup(mem_ctx
, base_name
);
199 base_length
= strlen(name_copy
);
202 /* For arrays of arrays we start at the innermost array and work our
203 * way out so we need to insert the subscript at the base of the
204 * name string rather than just attaching it to the end.
206 new_length
= base_length
;
207 ir_constant
*const_index
= a
->array_index
->as_constant();
208 char *end
= ralloc_strdup(NULL
, &name_copy
[new_length
]);
210 ir_rvalue
*array_index
= a
->array_index
;
211 if (array_index
->type
!= glsl_type::uint_type
)
212 array_index
= i2u(array_index
);
214 if (a
->array
->type
->is_array() &&
215 a
->array
->type
->fields
.array
->is_array()) {
216 ir_constant
*base_size
= new(mem_ctx
)
217 ir_constant(a
->array
->type
->fields
.array
->arrays_of_arrays_size());
218 array_index
= mul(array_index
, base_size
);
221 if (*nonconst_block_index
) {
222 *nonconst_block_index
= add(*nonconst_block_index
, array_index
);
224 *nonconst_block_index
= array_index
;
227 ralloc_asprintf_rewrite_tail(&name_copy
, &new_length
, "[0]%s",
230 ralloc_asprintf_rewrite_tail(&name_copy
, &new_length
, "[%d]%s",
231 const_index
->get_uint_component(0),
236 d
= a
->array
->as_dereference();
242 assert(!"Should not get here.");
247 assert(!"Should not get here.");
252 clamp_to_array_bounds(void *mem_ctx
, ir_rvalue
*index
, const glsl_type
*type
)
254 assert(type
->is_array());
256 const unsigned array_size
= type
->arrays_of_arrays_size();
258 ir_constant
*max_index
= new(mem_ctx
) ir_constant(array_size
- 1);
259 max_index
->type
= index
->type
;
261 ir_constant
*zero
= new(mem_ctx
) ir_constant(0);
262 zero
->type
= index
->type
;
264 if (index
->type
->base_type
== GLSL_TYPE_INT
)
265 index
= max2(index
, zero
);
266 index
= min2(index
, max_index
);
272 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx
,
276 unsigned *const_offset
,
278 const glsl_type
**matrix_type
,
279 enum glsl_interface_packing packing
)
281 /* Determine the name of the interface block */
282 ir_rvalue
*nonconst_block_index
;
283 const char *const field_name
=
284 interface_field_name(mem_ctx
, (char *) var
->get_interface_type()->name
,
285 deref
, &nonconst_block_index
);
287 if (nonconst_block_index
&& clamp_block_indices
) {
288 nonconst_block_index
=
289 clamp_to_array_bounds(mem_ctx
, nonconst_block_index
, var
->type
);
292 /* Locate the block by interface name */
294 struct gl_uniform_block
**blocks
;
295 if (this->buffer_access_type
!= ubo_load_access
) {
296 num_blocks
= shader
->Program
->info
.num_ssbos
;
297 blocks
= shader
->Program
->sh
.ShaderStorageBlocks
;
299 num_blocks
= shader
->Program
->info
.num_ubos
;
300 blocks
= shader
->Program
->sh
.UniformBlocks
;
302 this->uniform_block
= NULL
;
303 for (unsigned i
= 0; i
< num_blocks
; i
++) {
304 if (strcmp(field_name
, blocks
[i
]->Name
) == 0) {
306 ir_constant
*index
= new(mem_ctx
) ir_constant(i
);
308 if (nonconst_block_index
) {
309 this->uniform_block
= add(nonconst_block_index
, index
);
311 this->uniform_block
= index
;
314 if (var
->is_interface_instance()) {
317 *const_offset
= blocks
[i
]->Uniforms
[var
->data
.location
].Offset
;
324 assert(this->uniform_block
);
326 this->struct_field
= NULL
;
327 setup_buffer_access(mem_ctx
, deref
, offset
, const_offset
, row_major
,
328 matrix_type
, &this->struct_field
, packing
);
332 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue
**rvalue
)
337 ir_dereference
*deref
= (*rvalue
)->as_dereference();
341 ir_variable
*var
= deref
->variable_referenced();
342 if (!var
|| !var
->is_in_buffer_block())
345 void *mem_ctx
= ralloc_parent(shader
->ir
);
347 ir_rvalue
*offset
= NULL
;
348 unsigned const_offset
;
350 const glsl_type
*matrix_type
;
352 enum glsl_interface_packing packing
=
353 var
->get_interface_type()->
354 get_internal_ifc_packing(use_std430_as_default
);
356 this->buffer_access_type
=
357 var
->is_in_shader_storage_block() ?
358 ssbo_load_access
: ubo_load_access
;
359 this->variable
= var
;
361 /* Compute the offset to the start if the dereference as well as other
362 * information we need to configure the write
364 setup_for_load_or_store(mem_ctx
, var
, deref
,
365 &offset
, &const_offset
,
366 &row_major
, &matrix_type
,
370 /* Now that we've calculated the offset to the start of the
371 * dereference, walk over the type and emit loads into a temporary.
373 const glsl_type
*type
= (*rvalue
)->type
;
374 ir_variable
*load_var
= new(mem_ctx
) ir_variable(type
,
377 base_ir
->insert_before(load_var
);
379 ir_variable
*load_offset
= new(mem_ctx
) ir_variable(glsl_type::uint_type
,
380 "ubo_load_temp_offset",
382 base_ir
->insert_before(load_offset
);
383 base_ir
->insert_before(assign(load_offset
, offset
));
385 deref
= new(mem_ctx
) ir_dereference_variable(load_var
);
386 emit_access(mem_ctx
, false, deref
, load_offset
, const_offset
,
387 row_major
, matrix_type
, packing
, 0);
394 lower_ubo_reference_visitor::ubo_load(void *mem_ctx
,
395 const glsl_type
*type
,
398 ir_rvalue
*block_ref
= this->uniform_block
->clone(mem_ctx
, NULL
);
400 ir_expression(ir_binop_ubo_load
,
408 shader_storage_buffer_object(const _mesa_glsl_parse_state
*state
)
410 return state
->has_shader_storage_buffer_objects();
414 lower_ubo_reference_visitor::ssbo_access_params()
418 if (variable
->is_interface_instance()) {
419 assert(struct_field
);
421 return ((struct_field
->memory_coherent
? ACCESS_COHERENT
: 0) |
422 (struct_field
->memory_restrict
? ACCESS_RESTRICT
: 0) |
423 (struct_field
->memory_volatile
? ACCESS_VOLATILE
: 0));
425 return ((variable
->data
.memory_coherent
? ACCESS_COHERENT
: 0) |
426 (variable
->data
.memory_restrict
? ACCESS_RESTRICT
: 0) |
427 (variable
->data
.memory_volatile
? ACCESS_VOLATILE
: 0));
432 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx
,
437 exec_list sig_params
;
439 ir_variable
*block_ref
= new(mem_ctx
)
440 ir_variable(glsl_type::uint_type
, "block_ref" , ir_var_function_in
);
441 sig_params
.push_tail(block_ref
);
443 ir_variable
*offset_ref
= new(mem_ctx
)
444 ir_variable(glsl_type::uint_type
, "offset" , ir_var_function_in
);
445 sig_params
.push_tail(offset_ref
);
447 ir_variable
*val_ref
= new(mem_ctx
)
448 ir_variable(deref
->type
, "value" , ir_var_function_in
);
449 sig_params
.push_tail(val_ref
);
451 ir_variable
*writemask_ref
= new(mem_ctx
)
452 ir_variable(glsl_type::uint_type
, "write_mask" , ir_var_function_in
);
453 sig_params
.push_tail(writemask_ref
);
455 ir_variable
*access_ref
= new(mem_ctx
)
456 ir_variable(glsl_type::uint_type
, "access" , ir_var_function_in
);
457 sig_params
.push_tail(access_ref
);
459 ir_function_signature
*sig
= new(mem_ctx
)
460 ir_function_signature(glsl_type::void_type
, shader_storage_buffer_object
);
462 sig
->replace_parameters(&sig_params
);
463 sig
->intrinsic_id
= ir_intrinsic_ssbo_store
;
465 ir_function
*f
= new(mem_ctx
) ir_function("__intrinsic_store_ssbo");
466 f
->add_signature(sig
);
468 exec_list call_params
;
469 call_params
.push_tail(this->uniform_block
->clone(mem_ctx
, NULL
));
470 call_params
.push_tail(offset
->clone(mem_ctx
, NULL
));
471 call_params
.push_tail(deref
->clone(mem_ctx
, NULL
));
472 call_params
.push_tail(new(mem_ctx
) ir_constant(write_mask
));
473 call_params
.push_tail(new(mem_ctx
) ir_constant(ssbo_access_params()));
474 return new(mem_ctx
) ir_call(sig
, NULL
, &call_params
);
478 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx
,
479 const struct glsl_type
*type
,
482 exec_list sig_params
;
484 ir_variable
*block_ref
= new(mem_ctx
)
485 ir_variable(glsl_type::uint_type
, "block_ref" , ir_var_function_in
);
486 sig_params
.push_tail(block_ref
);
488 ir_variable
*offset_ref
= new(mem_ctx
)
489 ir_variable(glsl_type::uint_type
, "offset_ref" , ir_var_function_in
);
490 sig_params
.push_tail(offset_ref
);
492 ir_variable
*access_ref
= new(mem_ctx
)
493 ir_variable(glsl_type::uint_type
, "access" , ir_var_function_in
);
494 sig_params
.push_tail(access_ref
);
496 ir_function_signature
*sig
=
497 new(mem_ctx
) ir_function_signature(type
, shader_storage_buffer_object
);
499 sig
->replace_parameters(&sig_params
);
500 sig
->intrinsic_id
= ir_intrinsic_ssbo_load
;
502 ir_function
*f
= new(mem_ctx
) ir_function("__intrinsic_load_ssbo");
503 f
->add_signature(sig
);
505 ir_variable
*result
= new(mem_ctx
)
506 ir_variable(type
, "ssbo_load_result", ir_var_temporary
);
507 base_ir
->insert_before(result
);
508 ir_dereference_variable
*deref_result
= new(mem_ctx
)
509 ir_dereference_variable(result
);
511 exec_list call_params
;
512 call_params
.push_tail(this->uniform_block
->clone(mem_ctx
, NULL
));
513 call_params
.push_tail(offset
->clone(mem_ctx
, NULL
));
514 call_params
.push_tail(new(mem_ctx
) ir_constant(ssbo_access_params()));
516 return new(mem_ctx
) ir_call(sig
, deref_result
, &call_params
);
520 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx
,
521 ir_dereference
*deref
,
522 const glsl_type
*type
,
527 switch (this->buffer_access_type
) {
528 case ubo_load_access
:
529 base_ir
->insert_before(assign(deref
->clone(mem_ctx
, NULL
),
530 ubo_load(mem_ctx
, type
, offset
),
533 case ssbo_load_access
: {
534 ir_call
*load_ssbo
= ssbo_load(mem_ctx
, type
, offset
);
535 base_ir
->insert_before(load_ssbo
);
536 ir_rvalue
*value
= load_ssbo
->return_deref
->as_rvalue()->clone(mem_ctx
, NULL
);
537 ir_assignment
*assignment
=
538 assign(deref
->clone(mem_ctx
, NULL
), value
, mask
);
539 base_ir
->insert_before(assignment
);
542 case ssbo_store_access
:
544 base_ir
->insert_after(ssbo_store(mem_ctx
,
545 swizzle(deref
, channel
, 1),
548 base_ir
->insert_after(ssbo_store(mem_ctx
, deref
, offset
, mask
));
552 unreachable("invalid buffer_access_type in insert_buffer_access");
557 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx
,
558 ir_dereference
*deref
,
560 ir_variable
*write_var
,
563 ir_rvalue
*offset
= NULL
;
564 unsigned const_offset
;
566 const glsl_type
*matrix_type
;
568 enum glsl_interface_packing packing
=
569 var
->get_interface_type()->
570 get_internal_ifc_packing(use_std430_as_default
);
572 this->buffer_access_type
= ssbo_store_access
;
573 this->variable
= var
;
575 /* Compute the offset to the start if the dereference as well as other
576 * information we need to configure the write
578 setup_for_load_or_store(mem_ctx
, var
, deref
,
579 &offset
, &const_offset
,
580 &row_major
, &matrix_type
,
584 /* Now emit writes from the temporary to memory */
585 ir_variable
*write_offset
=
586 new(mem_ctx
) ir_variable(glsl_type::uint_type
,
587 "ssbo_store_temp_offset",
590 base_ir
->insert_before(write_offset
);
591 base_ir
->insert_before(assign(write_offset
, offset
));
593 deref
= new(mem_ctx
) ir_dereference_variable(write_var
);
594 emit_access(mem_ctx
, true, deref
, write_offset
, const_offset
,
595 row_major
, matrix_type
, packing
, write_mask
);
599 lower_ubo_reference_visitor::visit_enter(ir_expression
*ir
)
601 check_ssbo_unsized_array_length_expression(ir
);
602 return rvalue_visit(ir
);
606 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression
*expr
)
608 if (expr
->operation
!=
609 ir_expression_operation(ir_unop_ssbo_unsized_array_length
))
612 ir_rvalue
*rvalue
= expr
->operands
[0]->as_rvalue();
614 !rvalue
->type
->is_array() || !rvalue
->type
->is_unsized_array())
617 ir_dereference
*deref
= expr
->operands
[0]->as_dereference();
621 ir_variable
*var
= expr
->operands
[0]->variable_referenced();
622 if (!var
|| !var
->is_in_shader_storage_block())
624 return process_ssbo_unsized_array_length(&rvalue
, deref
, var
);
628 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression
*ir
)
631 ir_expression_operation(ir_unop_ssbo_unsized_array_length
)) {
632 /* Don't replace this unop if it is found alone. It is going to be
633 * removed by the optimization passes or replaced if it is part of
634 * an ir_assignment or another ir_expression.
639 for (unsigned i
= 0; i
< ir
->num_operands
; i
++) {
640 if (ir
->operands
[i
]->ir_type
!= ir_type_expression
)
642 ir_expression
*expr
= (ir_expression
*) ir
->operands
[i
];
643 ir_expression
*temp
= calculate_ssbo_unsized_array_length(expr
);
648 ir
->operands
[i
] = temp
;
653 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment
*ir
)
655 if (!ir
->rhs
|| ir
->rhs
->ir_type
!= ir_type_expression
)
658 ir_expression
*expr
= (ir_expression
*) ir
->rhs
;
659 ir_expression
*temp
= calculate_ssbo_unsized_array_length(expr
);
669 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx
)
671 ir_rvalue
*block_ref
= this->uniform_block
->clone(mem_ctx
, NULL
);
672 return new(mem_ctx
) ir_expression(ir_unop_get_buffer_size
,
678 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference
*deref
,
679 enum glsl_interface_packing packing
)
681 unsigned array_stride
= 0;
683 switch (deref
->ir_type
) {
684 case ir_type_dereference_variable
:
686 ir_dereference_variable
*deref_var
= (ir_dereference_variable
*)deref
;
687 const struct glsl_type
*unsized_array_type
= NULL
;
688 /* An unsized array can be sized by other lowering passes, so pick
689 * the first field of the array which has the data type of the unsized
692 unsized_array_type
= deref_var
->var
->type
->fields
.array
;
694 /* Whether or not the field is row-major (because it might be a
695 * bvec2 or something) does not affect the array itself. We need
696 * to know whether an array element in its entirety is row-major.
698 const bool array_row_major
=
699 is_dereferenced_thing_row_major(deref_var
);
701 if (packing
== GLSL_INTERFACE_PACKING_STD430
) {
702 array_stride
= unsized_array_type
->std430_array_stride(array_row_major
);
704 array_stride
= unsized_array_type
->std140_size(array_row_major
);
705 array_stride
= glsl_align(array_stride
, 16);
709 case ir_type_dereference_record
:
711 ir_dereference_record
*deref_record
= (ir_dereference_record
*) deref
;
712 ir_dereference
*interface_deref
=
713 deref_record
->record
->as_dereference();
714 assert(interface_deref
!= NULL
);
715 const struct glsl_type
*interface_type
= interface_deref
->type
;
716 unsigned record_length
= interface_type
->length
;
717 /* Unsized array is always the last element of the interface */
718 const struct glsl_type
*unsized_array_type
=
719 interface_type
->fields
.structure
[record_length
- 1].type
->fields
.array
;
721 const bool array_row_major
=
722 is_dereferenced_thing_row_major(deref_record
);
724 if (packing
== GLSL_INTERFACE_PACKING_STD430
) {
725 array_stride
= unsized_array_type
->std430_array_stride(array_row_major
);
727 array_stride
= unsized_array_type
->std140_size(array_row_major
);
728 array_stride
= glsl_align(array_stride
, 16);
733 unreachable("Unsupported dereference type");
739 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue
**rvalue
,
740 ir_dereference
*deref
,
743 void *mem_ctx
= ralloc_parent(*rvalue
);
745 ir_rvalue
*base_offset
= NULL
;
746 unsigned const_offset
;
748 const glsl_type
*matrix_type
;
750 enum glsl_interface_packing packing
=
751 var
->get_interface_type()->
752 get_internal_ifc_packing(use_std430_as_default
);
753 int unsized_array_stride
=
754 calculate_unsized_array_stride(deref
, packing
);
756 this->buffer_access_type
= ssbo_unsized_array_length_access
;
757 this->variable
= var
;
759 /* Compute the offset to the start if the dereference as well as other
760 * information we need to calculate the length.
762 setup_for_load_or_store(mem_ctx
, var
, deref
,
763 &base_offset
, &const_offset
,
764 &row_major
, &matrix_type
,
767 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
769 ir_expression
*buffer_size
= emit_ssbo_get_buffer_size(mem_ctx
);
771 ir_expression
*offset_of_array
= new(mem_ctx
)
772 ir_expression(ir_binop_add
, base_offset
,
773 new(mem_ctx
) ir_constant(const_offset
));
774 ir_expression
*offset_of_array_int
= new(mem_ctx
)
775 ir_expression(ir_unop_u2i
, offset_of_array
);
777 ir_expression
*sub
= new(mem_ctx
)
778 ir_expression(ir_binop_sub
, buffer_size
, offset_of_array_int
);
779 ir_expression
*div
= new(mem_ctx
)
780 ir_expression(ir_binop_div
, sub
,
781 new(mem_ctx
) ir_constant(unsized_array_stride
));
782 ir_expression
*max
= new(mem_ctx
)
783 ir_expression(ir_binop_max
, div
, new(mem_ctx
) ir_constant(0));
789 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment
*ir
)
794 ir_rvalue
*rvalue
= ir
->lhs
->as_rvalue();
798 ir_dereference
*deref
= ir
->lhs
->as_dereference();
802 ir_variable
*var
= ir
->lhs
->variable_referenced();
803 if (!var
|| !var
->is_in_shader_storage_block())
806 /* We have a write to a buffer variable, so declare a temporary and rewrite
807 * the assignment so that the temporary is the LHS.
809 void *mem_ctx
= ralloc_parent(shader
->ir
);
811 const glsl_type
*type
= rvalue
->type
;
812 ir_variable
*write_var
= new(mem_ctx
) ir_variable(type
,
815 base_ir
->insert_before(write_var
);
816 ir
->lhs
= new(mem_ctx
) ir_dereference_variable(write_var
);
818 /* Now we have to write the value assigned to the temporary back to memory */
819 write_to_memory(mem_ctx
, deref
, var
, write_var
, ir
->write_mask
);
824 is_buffer_backed_variable(ir_variable
*var
)
826 return var
->is_in_buffer_block() ||
827 var
->data
.mode
== ir_var_shader_shared
;
831 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment
*ir
)
833 if (!ir
|| !ir
->lhs
|| !ir
->rhs
)
836 /* LHS and RHS must be arrays
837 * FIXME: arrays of arrays?
839 if (!ir
->lhs
->type
->is_array() || !ir
->rhs
->type
->is_array())
842 /* RHS must be a buffer-backed variable. This is what can cause the problem
843 * since it would lead to a series of loads that need to live until we
844 * see the writes to the LHS.
846 ir_variable
*rhs_var
= ir
->rhs
->variable_referenced();
847 if (!rhs_var
|| !is_buffer_backed_variable(rhs_var
))
850 /* Split the array copy into individual element copies to reduce
853 ir_dereference
*rhs_deref
= ir
->rhs
->as_dereference();
857 ir_dereference
*lhs_deref
= ir
->lhs
->as_dereference();
861 assert(lhs_deref
->type
->length
== rhs_deref
->type
->length
);
862 void *mem_ctx
= ralloc_parent(shader
->ir
);
864 for (unsigned i
= 0; i
< lhs_deref
->type
->length
; i
++) {
865 ir_dereference
*lhs_i
=
866 new(mem_ctx
) ir_dereference_array(lhs_deref
->clone(mem_ctx
, NULL
),
867 new(mem_ctx
) ir_constant(i
));
869 ir_dereference
*rhs_i
=
870 new(mem_ctx
) ir_dereference_array(rhs_deref
->clone(mem_ctx
, NULL
),
871 new(mem_ctx
) ir_constant(i
));
872 ir
->insert_after(assign(lhs_i
, rhs_i
));
881 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment
*ir
)
883 if (!ir
|| !ir
->lhs
|| !ir
->rhs
)
886 /* LHS and RHS must be records */
887 if (!ir
->lhs
->type
->is_struct() || !ir
->rhs
->type
->is_struct())
890 /* RHS must be a buffer-backed variable. This is what can cause the problem
891 * since it would lead to a series of loads that need to live until we
892 * see the writes to the LHS.
894 ir_variable
*rhs_var
= ir
->rhs
->variable_referenced();
895 if (!rhs_var
|| !is_buffer_backed_variable(rhs_var
))
898 /* Split the struct copy into individual element copies to reduce
901 ir_dereference
*rhs_deref
= ir
->rhs
->as_dereference();
905 ir_dereference
*lhs_deref
= ir
->lhs
->as_dereference();
909 assert(lhs_deref
->type
== rhs_deref
->type
);
910 void *mem_ctx
= ralloc_parent(shader
->ir
);
912 for (unsigned i
= 0; i
< lhs_deref
->type
->length
; i
++) {
913 const char *field_name
= lhs_deref
->type
->fields
.structure
[i
].name
;
914 ir_dereference
*lhs_field
=
915 new(mem_ctx
) ir_dereference_record(lhs_deref
->clone(mem_ctx
, NULL
),
917 ir_dereference
*rhs_field
=
918 new(mem_ctx
) ir_dereference_record(rhs_deref
->clone(mem_ctx
, NULL
),
920 ir
->insert_after(assign(lhs_field
, rhs_field
));
929 lower_ubo_reference_visitor::visit_enter(ir_assignment
*ir
)
931 /* Array and struct copies could involve large amounts of load/store
932 * operations. To improve register pressure we want to special-case
933 * these and split them into individual element copies.
934 * This way we avoid emitting all the loads for the RHS first and
935 * all the writes for the LHS second and register usage is more
938 if (check_for_buffer_array_copy(ir
))
939 return visit_continue_with_parent
;
941 if (check_for_buffer_struct_copy(ir
))
942 return visit_continue_with_parent
;
944 check_ssbo_unsized_array_length_assignment(ir
);
945 check_for_ssbo_store(ir
);
946 return rvalue_visit(ir
);
949 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
950 * access to the buffer variable in the first parameter by an offset
951 * and block index. This involves creating the new internal intrinsic
952 * (i.e. the new function signature).
955 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call
*ir
)
957 /* SSBO atomics usually have 2 parameters, the buffer variable and an
958 * integer argument. The exception is CompSwap, that has an additional
961 int param_count
= ir
->actual_parameters
.length();
962 assert(param_count
== 2 || param_count
== 3);
964 /* First argument must be a scalar integer buffer variable */
965 exec_node
*param
= ir
->actual_parameters
.get_head();
966 ir_instruction
*inst
= (ir_instruction
*) param
;
967 assert(inst
->ir_type
== ir_type_dereference_variable
||
968 inst
->ir_type
== ir_type_dereference_array
||
969 inst
->ir_type
== ir_type_dereference_record
||
970 inst
->ir_type
== ir_type_swizzle
);
972 ir_rvalue
*deref
= (ir_rvalue
*) inst
;
973 assert(deref
->type
->is_scalar() &&
974 (deref
->type
->is_integer() || deref
->type
->is_float()));
976 ir_variable
*var
= deref
->variable_referenced();
979 /* Compute the offset to the start if the dereference and the
982 void *mem_ctx
= ralloc_parent(shader
->ir
);
984 ir_rvalue
*offset
= NULL
;
985 unsigned const_offset
;
987 const glsl_type
*matrix_type
;
989 enum glsl_interface_packing packing
=
990 var
->get_interface_type()->
991 get_internal_ifc_packing(use_std430_as_default
);
993 this->buffer_access_type
= ssbo_atomic_access
;
994 this->variable
= var
;
996 setup_for_load_or_store(mem_ctx
, var
, deref
,
997 &offset
, &const_offset
,
998 &row_major
, &matrix_type
,
1002 assert(matrix_type
== NULL
);
1004 ir_rvalue
*deref_offset
=
1005 add(offset
, new(mem_ctx
) ir_constant(const_offset
));
1006 ir_rvalue
*block_index
= this->uniform_block
->clone(mem_ctx
, NULL
);
1008 /* Create the new internal function signature that will take a block
1009 * index and offset instead of a buffer variable
1011 exec_list sig_params
;
1012 ir_variable
*sig_param
= new(mem_ctx
)
1013 ir_variable(glsl_type::uint_type
, "block_ref" , ir_var_function_in
);
1014 sig_params
.push_tail(sig_param
);
1016 sig_param
= new(mem_ctx
)
1017 ir_variable(glsl_type::uint_type
, "offset" , ir_var_function_in
);
1018 sig_params
.push_tail(sig_param
);
1020 const glsl_type
*type
= deref
->type
->get_scalar_type();
1021 sig_param
= new(mem_ctx
)
1022 ir_variable(type
, "data1", ir_var_function_in
);
1023 sig_params
.push_tail(sig_param
);
1025 if (param_count
== 3) {
1026 sig_param
= new(mem_ctx
)
1027 ir_variable(type
, "data2", ir_var_function_in
);
1028 sig_params
.push_tail(sig_param
);
1031 ir_function_signature
*sig
=
1032 new(mem_ctx
) ir_function_signature(deref
->type
,
1033 shader_storage_buffer_object
);
1035 sig
->replace_parameters(&sig_params
);
1037 assert(ir
->callee
->intrinsic_id
>= ir_intrinsic_generic_load
);
1038 assert(ir
->callee
->intrinsic_id
<= ir_intrinsic_generic_atomic_comp_swap
);
1039 sig
->intrinsic_id
= MAP_INTRINSIC_TO_TYPE(ir
->callee
->intrinsic_id
, ssbo
);
1042 sprintf(func_name
, "%s_ssbo", ir
->callee_name());
1043 ir_function
*f
= new(mem_ctx
) ir_function(func_name
);
1044 f
->add_signature(sig
);
1046 /* Now, create the call to the internal intrinsic */
1047 exec_list call_params
;
1048 call_params
.push_tail(block_index
);
1049 call_params
.push_tail(deref_offset
);
1050 param
= ir
->actual_parameters
.get_head()->get_next();
1051 ir_rvalue
*param_as_rvalue
= ((ir_instruction
*) param
)->as_rvalue();
1052 call_params
.push_tail(param_as_rvalue
->clone(mem_ctx
, NULL
));
1053 if (param_count
== 3) {
1054 param
= param
->get_next();
1055 param_as_rvalue
= ((ir_instruction
*) param
)->as_rvalue();
1056 call_params
.push_tail(param_as_rvalue
->clone(mem_ctx
, NULL
));
1058 ir_dereference_variable
*return_deref
=
1059 ir
->return_deref
->clone(mem_ctx
, NULL
);
1060 return new(mem_ctx
) ir_call(sig
, return_deref
, &call_params
);
1064 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call
*ir
)
1066 exec_list
& params
= ir
->actual_parameters
;
1068 if (params
.length() < 2 || params
.length() > 3)
1072 ((ir_instruction
*) params
.get_head())->as_rvalue();
1076 ir_variable
*var
= rvalue
->variable_referenced();
1077 if (!var
|| !var
->is_in_shader_storage_block())
1080 const enum ir_intrinsic_id id
= ir
->callee
->intrinsic_id
;
1081 if (id
== ir_intrinsic_generic_atomic_add
||
1082 id
== ir_intrinsic_generic_atomic_min
||
1083 id
== ir_intrinsic_generic_atomic_max
||
1084 id
== ir_intrinsic_generic_atomic_and
||
1085 id
== ir_intrinsic_generic_atomic_or
||
1086 id
== ir_intrinsic_generic_atomic_xor
||
1087 id
== ir_intrinsic_generic_atomic_exchange
||
1088 id
== ir_intrinsic_generic_atomic_comp_swap
) {
1089 return lower_ssbo_atomic_intrinsic(ir
);
1097 lower_ubo_reference_visitor::visit_enter(ir_call
*ir
)
1099 ir_call
*new_ir
= check_for_ssbo_atomic_intrinsic(ir
);
1102 base_ir
->replace_with(new_ir
);
1103 return visit_continue_with_parent
;
1106 return rvalue_visit(ir
);
1111 lower_ubo_reference_visitor::visit_enter(ir_texture
*ir
)
1113 ir_dereference
*sampler
= ir
->sampler
;
1115 if (sampler
->ir_type
== ir_type_dereference_record
) {
1116 handle_rvalue((ir_rvalue
**)&ir
->sampler
);
1117 return visit_continue_with_parent
;
1120 return rvalue_visit(ir
);
1124 } /* unnamed namespace */
1127 lower_ubo_reference(struct gl_linked_shader
*shader
,
1128 bool clamp_block_indices
, bool use_std430_as_default
)
1130 lower_ubo_reference_visitor
v(shader
, clamp_block_indices
,
1131 use_std430_as_default
);
1133 /* Loop over the instructions lowering references, because we take
1134 * a deref of a UBO array using a UBO dereference as the index will
1135 * produce a collection of instructions all of which have cloned
1136 * UBO dereferences for that array index.
1140 visit_list_elements(&v
, shader
->ir
);
1141 } while (v
.progress
);