glsl: Add ir_function_signature::is_intrinsic() method
[mesa.git] / src / compiler / glsl / lower_ubo_reference.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_ubo_reference.cpp
26 *
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
30 *
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
33 * their own.
34 */
35
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40
41 using namespace ir_builder;
42
43 namespace {
44 class lower_ubo_reference_visitor :
45 public lower_buffer_access::lower_buffer_access {
46 public:
47 lower_ubo_reference_visitor(struct gl_linked_shader *shader,
48 bool clamp_block_indices)
49 : shader(shader), clamp_block_indices(clamp_block_indices),
50 struct_field(NULL), variable(NULL)
51 {
52 }
53
54 void handle_rvalue(ir_rvalue **rvalue);
55 ir_visitor_status visit_enter(ir_assignment *ir);
56
57 void setup_for_load_or_store(void *mem_ctx,
58 ir_variable *var,
59 ir_rvalue *deref,
60 ir_rvalue **offset,
61 unsigned *const_offset,
62 bool *row_major,
63 int *matrix_columns,
64 enum glsl_interface_packing packing);
65 uint32_t ssbo_access_params();
66 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
67 ir_rvalue *offset);
68 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
69 ir_rvalue *offset);
70
71 bool check_for_buffer_array_copy(ir_assignment *ir);
72 bool check_for_buffer_struct_copy(ir_assignment *ir);
73 void check_for_ssbo_store(ir_assignment *ir);
74 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
75 ir_variable *write_var, unsigned write_mask);
76 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
77 unsigned write_mask);
78
79 enum {
80 ubo_load_access,
81 ssbo_load_access,
82 ssbo_store_access,
83 ssbo_unsized_array_length_access,
84 ssbo_atomic_access,
85 } buffer_access_type;
86
87 void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
88 const glsl_type *type, ir_rvalue *offset,
89 unsigned mask, int channel);
90
91 ir_visitor_status visit_enter(class ir_expression *);
92 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
93 void check_ssbo_unsized_array_length_expression(class ir_expression *);
94 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
95
96 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
97 ir_dereference *,
98 ir_variable *);
99 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
100
101 unsigned calculate_unsized_array_stride(ir_dereference *deref,
102 enum glsl_interface_packing packing);
103
104 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
105 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
106 ir_visitor_status visit_enter(ir_call *ir);
107
108 struct gl_linked_shader *shader;
109 bool clamp_block_indices;
110 struct gl_uniform_buffer_variable *ubo_var;
111 const struct glsl_struct_field *struct_field;
112 ir_variable *variable;
113 ir_rvalue *uniform_block;
114 bool progress;
115 };
116
117 /**
118 * Determine the name of the interface block field
119 *
120 * This is the name of the specific member as it would appear in the
121 * \c gl_uniform_buffer_variable::Name field in the shader's
122 * \c UniformBlocks array.
123 */
124 static const char *
125 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
126 ir_rvalue **nonconst_block_index)
127 {
128 *nonconst_block_index = NULL;
129 char *name_copy = NULL;
130 size_t base_length = 0;
131
132 /* Loop back through the IR until we find the uniform block */
133 ir_rvalue *ir = d;
134 while (ir != NULL) {
135 switch (ir->ir_type) {
136 case ir_type_dereference_variable: {
137 /* Exit loop */
138 ir = NULL;
139 break;
140 }
141
142 case ir_type_dereference_record: {
143 ir_dereference_record *r = (ir_dereference_record *) ir;
144 ir = r->record->as_dereference();
145
146 /* If we got here it means any previous array subscripts belong to
147 * block members and not the block itself so skip over them in the
148 * next pass.
149 */
150 d = ir;
151 break;
152 }
153
154 case ir_type_dereference_array: {
155 ir_dereference_array *a = (ir_dereference_array *) ir;
156 ir = a->array->as_dereference();
157 break;
158 }
159
160 case ir_type_swizzle: {
161 ir_swizzle *s = (ir_swizzle *) ir;
162 ir = s->val->as_dereference();
163 /* Skip swizzle in the next pass */
164 d = ir;
165 break;
166 }
167
168 default:
169 assert(!"Should not get here.");
170 break;
171 }
172 }
173
174 while (d != NULL) {
175 switch (d->ir_type) {
176 case ir_type_dereference_variable: {
177 ir_dereference_variable *v = (ir_dereference_variable *) d;
178 if (name_copy != NULL &&
179 v->var->is_interface_instance() &&
180 v->var->type->is_array()) {
181 return name_copy;
182 } else {
183 *nonconst_block_index = NULL;
184 return base_name;
185 }
186
187 break;
188 }
189
190 case ir_type_dereference_array: {
191 ir_dereference_array *a = (ir_dereference_array *) d;
192 size_t new_length;
193
194 if (name_copy == NULL) {
195 name_copy = ralloc_strdup(mem_ctx, base_name);
196 base_length = strlen(name_copy);
197 }
198
199 /* For arrays of arrays we start at the innermost array and work our
200 * way out so we need to insert the subscript at the base of the
201 * name string rather than just attaching it to the end.
202 */
203 new_length = base_length;
204 ir_constant *const_index = a->array_index->as_constant();
205 char *end = ralloc_strdup(NULL, &name_copy[new_length]);
206 if (!const_index) {
207 ir_rvalue *array_index = a->array_index;
208 if (array_index->type != glsl_type::uint_type)
209 array_index = i2u(array_index);
210
211 if (a->array->type->is_array() &&
212 a->array->type->fields.array->is_array()) {
213 ir_constant *base_size = new(mem_ctx)
214 ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
215 array_index = mul(array_index, base_size);
216 }
217
218 if (*nonconst_block_index) {
219 *nonconst_block_index = add(*nonconst_block_index, array_index);
220 } else {
221 *nonconst_block_index = array_index;
222 }
223
224 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
225 end);
226 } else {
227 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
228 const_index->get_uint_component(0),
229 end);
230 }
231 ralloc_free(end);
232
233 d = a->array->as_dereference();
234
235 break;
236 }
237
238 default:
239 assert(!"Should not get here.");
240 break;
241 }
242 }
243
244 assert(!"Should not get here.");
245 return NULL;
246 }
247
248 static ir_rvalue *
249 clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
250 {
251 assert(type->is_array());
252
253 const unsigned array_size = type->arrays_of_arrays_size();
254
255 ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
256 max_index->type = index->type;
257
258 ir_constant *zero = new(mem_ctx) ir_constant(0);
259 zero->type = index->type;
260
261 if (index->type->base_type == GLSL_TYPE_INT)
262 index = max2(index, zero);
263 index = min2(index, max_index);
264
265 return index;
266 }
267
268 void
269 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
270 ir_variable *var,
271 ir_rvalue *deref,
272 ir_rvalue **offset,
273 unsigned *const_offset,
274 bool *row_major,
275 int *matrix_columns,
276 enum glsl_interface_packing packing)
277 {
278 /* Determine the name of the interface block */
279 ir_rvalue *nonconst_block_index;
280 const char *const field_name =
281 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
282 deref, &nonconst_block_index);
283
284 if (nonconst_block_index && clamp_block_indices) {
285 nonconst_block_index =
286 clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
287 }
288
289 /* Locate the block by interface name */
290 unsigned num_blocks;
291 struct gl_uniform_block **blocks;
292 if (this->buffer_access_type != ubo_load_access) {
293 num_blocks = shader->NumShaderStorageBlocks;
294 blocks = shader->ShaderStorageBlocks;
295 } else {
296 num_blocks = shader->NumUniformBlocks;
297 blocks = shader->UniformBlocks;
298 }
299 this->uniform_block = NULL;
300 for (unsigned i = 0; i < num_blocks; i++) {
301 if (strcmp(field_name, blocks[i]->Name) == 0) {
302
303 ir_constant *index = new(mem_ctx) ir_constant(i);
304
305 if (nonconst_block_index) {
306 this->uniform_block = add(nonconst_block_index, index);
307 } else {
308 this->uniform_block = index;
309 }
310
311 this->ubo_var = var->is_interface_instance()
312 ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
313
314 break;
315 }
316 }
317
318 assert(this->uniform_block);
319
320 *const_offset = ubo_var->Offset;
321
322 this->struct_field = NULL;
323 setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
324 matrix_columns, &this->struct_field, packing);
325 }
326
327 void
328 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
329 {
330 if (!*rvalue)
331 return;
332
333 ir_dereference *deref = (*rvalue)->as_dereference();
334 if (!deref)
335 return;
336
337 ir_variable *var = deref->variable_referenced();
338 if (!var || !var->is_in_buffer_block())
339 return;
340
341 void *mem_ctx = ralloc_parent(shader->ir);
342
343 ir_rvalue *offset = NULL;
344 unsigned const_offset;
345 bool row_major;
346 int matrix_columns;
347 enum glsl_interface_packing packing = var->get_interface_type_packing();
348
349 this->buffer_access_type =
350 var->is_in_shader_storage_block() ?
351 ssbo_load_access : ubo_load_access;
352 this->variable = var;
353
354 /* Compute the offset to the start if the dereference as well as other
355 * information we need to configure the write
356 */
357 setup_for_load_or_store(mem_ctx, var, deref,
358 &offset, &const_offset,
359 &row_major, &matrix_columns,
360 packing);
361 assert(offset);
362
363 /* Now that we've calculated the offset to the start of the
364 * dereference, walk over the type and emit loads into a temporary.
365 */
366 const glsl_type *type = (*rvalue)->type;
367 ir_variable *load_var = new(mem_ctx) ir_variable(type,
368 "ubo_load_temp",
369 ir_var_temporary);
370 base_ir->insert_before(load_var);
371
372 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
373 "ubo_load_temp_offset",
374 ir_var_temporary);
375 base_ir->insert_before(load_offset);
376 base_ir->insert_before(assign(load_offset, offset));
377
378 deref = new(mem_ctx) ir_dereference_variable(load_var);
379 emit_access(mem_ctx, false, deref, load_offset, const_offset,
380 row_major, matrix_columns, packing, 0);
381 *rvalue = deref;
382
383 progress = true;
384 }
385
386 ir_expression *
387 lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
388 const glsl_type *type,
389 ir_rvalue *offset)
390 {
391 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
392 return new(mem_ctx)
393 ir_expression(ir_binop_ubo_load,
394 type,
395 block_ref,
396 offset);
397
398 }
399
400 static bool
401 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
402 {
403 return state->has_shader_storage_buffer_objects();
404 }
405
406 uint32_t
407 lower_ubo_reference_visitor::ssbo_access_params()
408 {
409 assert(variable);
410
411 if (variable->is_interface_instance()) {
412 assert(struct_field);
413
414 return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) |
415 (struct_field->image_restrict ? ACCESS_RESTRICT : 0) |
416 (struct_field->image_volatile ? ACCESS_VOLATILE : 0));
417 } else {
418 return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) |
419 (variable->data.image_restrict ? ACCESS_RESTRICT : 0) |
420 (variable->data.image_volatile ? ACCESS_VOLATILE : 0));
421 }
422 }
423
424 ir_call *
425 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
426 ir_rvalue *deref,
427 ir_rvalue *offset,
428 unsigned write_mask)
429 {
430 exec_list sig_params;
431
432 ir_variable *block_ref = new(mem_ctx)
433 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
434 sig_params.push_tail(block_ref);
435
436 ir_variable *offset_ref = new(mem_ctx)
437 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
438 sig_params.push_tail(offset_ref);
439
440 ir_variable *val_ref = new(mem_ctx)
441 ir_variable(deref->type, "value" , ir_var_function_in);
442 sig_params.push_tail(val_ref);
443
444 ir_variable *writemask_ref = new(mem_ctx)
445 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
446 sig_params.push_tail(writemask_ref);
447
448 ir_variable *access_ref = new(mem_ctx)
449 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
450 sig_params.push_tail(access_ref);
451
452 ir_function_signature *sig = new(mem_ctx)
453 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
454 assert(sig);
455 sig->replace_parameters(&sig_params);
456 sig->_is_intrinsic = true;
457 sig->intrinsic_id = ir_intrinsic_ssbo_store;
458
459 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
460 f->add_signature(sig);
461
462 exec_list call_params;
463 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
464 call_params.push_tail(offset->clone(mem_ctx, NULL));
465 call_params.push_tail(deref->clone(mem_ctx, NULL));
466 call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
467 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
468 return new(mem_ctx) ir_call(sig, NULL, &call_params);
469 }
470
471 ir_call *
472 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
473 const struct glsl_type *type,
474 ir_rvalue *offset)
475 {
476 exec_list sig_params;
477
478 ir_variable *block_ref = new(mem_ctx)
479 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
480 sig_params.push_tail(block_ref);
481
482 ir_variable *offset_ref = new(mem_ctx)
483 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
484 sig_params.push_tail(offset_ref);
485
486 ir_variable *access_ref = new(mem_ctx)
487 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
488 sig_params.push_tail(access_ref);
489
490 ir_function_signature *sig =
491 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
492 assert(sig);
493 sig->replace_parameters(&sig_params);
494 sig->_is_intrinsic = true;
495 sig->intrinsic_id = ir_intrinsic_ssbo_load;
496
497 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
498 f->add_signature(sig);
499
500 ir_variable *result = new(mem_ctx)
501 ir_variable(type, "ssbo_load_result", ir_var_temporary);
502 base_ir->insert_before(result);
503 ir_dereference_variable *deref_result = new(mem_ctx)
504 ir_dereference_variable(result);
505
506 exec_list call_params;
507 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
508 call_params.push_tail(offset->clone(mem_ctx, NULL));
509 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
510
511 return new(mem_ctx) ir_call(sig, deref_result, &call_params);
512 }
513
514 void
515 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
516 ir_dereference *deref,
517 const glsl_type *type,
518 ir_rvalue *offset,
519 unsigned mask,
520 int channel)
521 {
522 switch (this->buffer_access_type) {
523 case ubo_load_access:
524 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
525 ubo_load(mem_ctx, type, offset),
526 mask));
527 break;
528 case ssbo_load_access: {
529 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
530 base_ir->insert_before(load_ssbo);
531 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
532 ir_assignment *assignment =
533 assign(deref->clone(mem_ctx, NULL), value, mask);
534 base_ir->insert_before(assignment);
535 break;
536 }
537 case ssbo_store_access:
538 if (channel >= 0) {
539 base_ir->insert_after(ssbo_store(mem_ctx,
540 swizzle(deref, channel, 1),
541 offset, 1));
542 } else {
543 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
544 }
545 break;
546 default:
547 unreachable("invalid buffer_access_type in insert_buffer_access");
548 }
549 }
550
551 void
552 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
553 ir_dereference *deref,
554 ir_variable *var,
555 ir_variable *write_var,
556 unsigned write_mask)
557 {
558 ir_rvalue *offset = NULL;
559 unsigned const_offset;
560 bool row_major;
561 int matrix_columns;
562 enum glsl_interface_packing packing = var->get_interface_type_packing();
563
564 this->buffer_access_type = ssbo_store_access;
565 this->variable = var;
566
567 /* Compute the offset to the start if the dereference as well as other
568 * information we need to configure the write
569 */
570 setup_for_load_or_store(mem_ctx, var, deref,
571 &offset, &const_offset,
572 &row_major, &matrix_columns,
573 packing);
574 assert(offset);
575
576 /* Now emit writes from the temporary to memory */
577 ir_variable *write_offset =
578 new(mem_ctx) ir_variable(glsl_type::uint_type,
579 "ssbo_store_temp_offset",
580 ir_var_temporary);
581
582 base_ir->insert_before(write_offset);
583 base_ir->insert_before(assign(write_offset, offset));
584
585 deref = new(mem_ctx) ir_dereference_variable(write_var);
586 emit_access(mem_ctx, true, deref, write_offset, const_offset,
587 row_major, matrix_columns, packing, write_mask);
588 }
589
590 ir_visitor_status
591 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
592 {
593 check_ssbo_unsized_array_length_expression(ir);
594 return rvalue_visit(ir);
595 }
596
597 ir_expression *
598 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
599 {
600 if (expr->operation !=
601 ir_expression_operation(ir_unop_ssbo_unsized_array_length))
602 return NULL;
603
604 ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
605 if (!rvalue ||
606 !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
607 return NULL;
608
609 ir_dereference *deref = expr->operands[0]->as_dereference();
610 if (!deref)
611 return NULL;
612
613 ir_variable *var = expr->operands[0]->variable_referenced();
614 if (!var || !var->is_in_shader_storage_block())
615 return NULL;
616 return process_ssbo_unsized_array_length(&rvalue, deref, var);
617 }
618
619 void
620 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
621 {
622 if (ir->operation ==
623 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
624 /* Don't replace this unop if it is found alone. It is going to be
625 * removed by the optimization passes or replaced if it is part of
626 * an ir_assignment or another ir_expression.
627 */
628 return;
629 }
630
631 for (unsigned i = 0; i < ir->get_num_operands(); i++) {
632 if (ir->operands[i]->ir_type != ir_type_expression)
633 continue;
634 ir_expression *expr = (ir_expression *) ir->operands[i];
635 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
636 if (!temp)
637 continue;
638
639 delete expr;
640 ir->operands[i] = temp;
641 }
642 }
643
644 void
645 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
646 {
647 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
648 return;
649
650 ir_expression *expr = (ir_expression *) ir->rhs;
651 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
652 if (!temp)
653 return;
654
655 delete expr;
656 ir->rhs = temp;
657 return;
658 }
659
660 ir_expression *
661 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
662 {
663 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
664 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
665 glsl_type::int_type,
666 block_ref);
667 }
668
669 unsigned
670 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
671 enum glsl_interface_packing packing)
672 {
673 unsigned array_stride = 0;
674
675 switch (deref->ir_type) {
676 case ir_type_dereference_variable:
677 {
678 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
679 const struct glsl_type *unsized_array_type = NULL;
680 /* An unsized array can be sized by other lowering passes, so pick
681 * the first field of the array which has the data type of the unsized
682 * array.
683 */
684 unsized_array_type = deref_var->var->type->fields.array;
685
686 /* Whether or not the field is row-major (because it might be a
687 * bvec2 or something) does not affect the array itself. We need
688 * to know whether an array element in its entirety is row-major.
689 */
690 const bool array_row_major =
691 is_dereferenced_thing_row_major(deref_var);
692
693 if (packing == GLSL_INTERFACE_PACKING_STD430) {
694 array_stride = unsized_array_type->std430_array_stride(array_row_major);
695 } else {
696 array_stride = unsized_array_type->std140_size(array_row_major);
697 array_stride = glsl_align(array_stride, 16);
698 }
699 break;
700 }
701 case ir_type_dereference_record:
702 {
703 ir_dereference_record *deref_record = (ir_dereference_record *) deref;
704 ir_dereference *interface_deref =
705 deref_record->record->as_dereference();
706 assert(interface_deref != NULL);
707 const struct glsl_type *interface_type = interface_deref->type;
708 unsigned record_length = interface_type->length;
709 /* Unsized array is always the last element of the interface */
710 const struct glsl_type *unsized_array_type =
711 interface_type->fields.structure[record_length - 1].type->fields.array;
712
713 const bool array_row_major =
714 is_dereferenced_thing_row_major(deref_record);
715
716 if (packing == GLSL_INTERFACE_PACKING_STD430) {
717 array_stride = unsized_array_type->std430_array_stride(array_row_major);
718 } else {
719 array_stride = unsized_array_type->std140_size(array_row_major);
720 array_stride = glsl_align(array_stride, 16);
721 }
722 break;
723 }
724 default:
725 unreachable("Unsupported dereference type");
726 }
727 return array_stride;
728 }
729
730 ir_expression *
731 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
732 ir_dereference *deref,
733 ir_variable *var)
734 {
735 void *mem_ctx = ralloc_parent(*rvalue);
736
737 ir_rvalue *base_offset = NULL;
738 unsigned const_offset;
739 bool row_major;
740 int matrix_columns;
741 enum glsl_interface_packing packing = var->get_interface_type_packing();
742 int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
743
744 this->buffer_access_type = ssbo_unsized_array_length_access;
745 this->variable = var;
746
747 /* Compute the offset to the start if the dereference as well as other
748 * information we need to calculate the length.
749 */
750 setup_for_load_or_store(mem_ctx, var, deref,
751 &base_offset, &const_offset,
752 &row_major, &matrix_columns,
753 packing);
754 /* array.length() =
755 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
756 */
757 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
758
759 ir_expression *offset_of_array = new(mem_ctx)
760 ir_expression(ir_binop_add, base_offset,
761 new(mem_ctx) ir_constant(const_offset));
762 ir_expression *offset_of_array_int = new(mem_ctx)
763 ir_expression(ir_unop_u2i, offset_of_array);
764
765 ir_expression *sub = new(mem_ctx)
766 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
767 ir_expression *div = new(mem_ctx)
768 ir_expression(ir_binop_div, sub,
769 new(mem_ctx) ir_constant(unsized_array_stride));
770 ir_expression *max = new(mem_ctx)
771 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
772
773 return max;
774 }
775
776 void
777 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
778 {
779 if (!ir || !ir->lhs)
780 return;
781
782 ir_rvalue *rvalue = ir->lhs->as_rvalue();
783 if (!rvalue)
784 return;
785
786 ir_dereference *deref = ir->lhs->as_dereference();
787 if (!deref)
788 return;
789
790 ir_variable *var = ir->lhs->variable_referenced();
791 if (!var || !var->is_in_shader_storage_block())
792 return;
793
794 /* We have a write to a buffer variable, so declare a temporary and rewrite
795 * the assignment so that the temporary is the LHS.
796 */
797 void *mem_ctx = ralloc_parent(shader->ir);
798
799 const glsl_type *type = rvalue->type;
800 ir_variable *write_var = new(mem_ctx) ir_variable(type,
801 "ssbo_store_temp",
802 ir_var_temporary);
803 base_ir->insert_before(write_var);
804 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
805
806 /* Now we have to write the value assigned to the temporary back to memory */
807 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
808 progress = true;
809 }
810
811 static bool
812 is_buffer_backed_variable(ir_variable *var)
813 {
814 return var->is_in_buffer_block() ||
815 var->data.mode == ir_var_shader_shared;
816 }
817
818 bool
819 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
820 {
821 if (!ir || !ir->lhs || !ir->rhs)
822 return false;
823
824 /* LHS and RHS must be arrays
825 * FIXME: arrays of arrays?
826 */
827 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
828 return false;
829
830 /* RHS must be a buffer-backed variable. This is what can cause the problem
831 * since it would lead to a series of loads that need to live until we
832 * see the writes to the LHS.
833 */
834 ir_variable *rhs_var = ir->rhs->variable_referenced();
835 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
836 return false;
837
838 /* Split the array copy into individual element copies to reduce
839 * register pressure
840 */
841 ir_dereference *rhs_deref = ir->rhs->as_dereference();
842 if (!rhs_deref)
843 return false;
844
845 ir_dereference *lhs_deref = ir->lhs->as_dereference();
846 if (!lhs_deref)
847 return false;
848
849 assert(lhs_deref->type->length == rhs_deref->type->length);
850 void *mem_ctx = ralloc_parent(shader->ir);
851
852 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
853 ir_dereference *lhs_i =
854 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
855 new(mem_ctx) ir_constant(i));
856
857 ir_dereference *rhs_i =
858 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
859 new(mem_ctx) ir_constant(i));
860 ir->insert_after(assign(lhs_i, rhs_i));
861 }
862
863 ir->remove();
864 progress = true;
865 return true;
866 }
867
868 bool
869 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
870 {
871 if (!ir || !ir->lhs || !ir->rhs)
872 return false;
873
874 /* LHS and RHS must be records */
875 if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
876 return false;
877
878 /* RHS must be a buffer-backed variable. This is what can cause the problem
879 * since it would lead to a series of loads that need to live until we
880 * see the writes to the LHS.
881 */
882 ir_variable *rhs_var = ir->rhs->variable_referenced();
883 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
884 return false;
885
886 /* Split the struct copy into individual element copies to reduce
887 * register pressure
888 */
889 ir_dereference *rhs_deref = ir->rhs->as_dereference();
890 if (!rhs_deref)
891 return false;
892
893 ir_dereference *lhs_deref = ir->lhs->as_dereference();
894 if (!lhs_deref)
895 return false;
896
897 assert(lhs_deref->type->record_compare(rhs_deref->type));
898 void *mem_ctx = ralloc_parent(shader->ir);
899
900 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
901 const char *field_name = lhs_deref->type->fields.structure[i].name;
902 ir_dereference *lhs_field =
903 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
904 field_name);
905 ir_dereference *rhs_field =
906 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
907 field_name);
908 ir->insert_after(assign(lhs_field, rhs_field));
909 }
910
911 ir->remove();
912 progress = true;
913 return true;
914 }
915
916 ir_visitor_status
917 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
918 {
919 /* Array and struct copies could involve large amounts of load/store
920 * operations. To improve register pressure we want to special-case
921 * these and split them into individual element copies.
922 * This way we avoid emitting all the loads for the RHS first and
923 * all the writes for the LHS second and register usage is more
924 * efficient.
925 */
926 if (check_for_buffer_array_copy(ir))
927 return visit_continue_with_parent;
928
929 if (check_for_buffer_struct_copy(ir))
930 return visit_continue_with_parent;
931
932 check_ssbo_unsized_array_length_assignment(ir);
933 check_for_ssbo_store(ir);
934 return rvalue_visit(ir);
935 }
936
937 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
938 * access to the buffer variable in the first parameter by an offset
939 * and block index. This involves creating the new internal intrinsic
940 * (i.e. the new function signature).
941 */
942 ir_call *
943 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
944 {
945 /* SSBO atomics usually have 2 parameters, the buffer variable and an
946 * integer argument. The exception is CompSwap, that has an additional
947 * integer parameter.
948 */
949 int param_count = ir->actual_parameters.length();
950 assert(param_count == 2 || param_count == 3);
951
952 /* First argument must be a scalar integer buffer variable */
953 exec_node *param = ir->actual_parameters.get_head();
954 ir_instruction *inst = (ir_instruction *) param;
955 assert(inst->ir_type == ir_type_dereference_variable ||
956 inst->ir_type == ir_type_dereference_array ||
957 inst->ir_type == ir_type_dereference_record ||
958 inst->ir_type == ir_type_swizzle);
959
960 ir_rvalue *deref = (ir_rvalue *) inst;
961 assert(deref->type->is_scalar() && deref->type->is_integer());
962
963 ir_variable *var = deref->variable_referenced();
964 assert(var);
965
966 /* Compute the offset to the start if the dereference and the
967 * block index
968 */
969 void *mem_ctx = ralloc_parent(shader->ir);
970
971 ir_rvalue *offset = NULL;
972 unsigned const_offset;
973 bool row_major;
974 int matrix_columns;
975 enum glsl_interface_packing packing = var->get_interface_type_packing();
976
977 this->buffer_access_type = ssbo_atomic_access;
978 this->variable = var;
979
980 setup_for_load_or_store(mem_ctx, var, deref,
981 &offset, &const_offset,
982 &row_major, &matrix_columns,
983 packing);
984 assert(offset);
985 assert(!row_major);
986 assert(matrix_columns == 1);
987
988 ir_rvalue *deref_offset =
989 add(offset, new(mem_ctx) ir_constant(const_offset));
990 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
991
992 /* Create the new internal function signature that will take a block
993 * index and offset instead of a buffer variable
994 */
995 exec_list sig_params;
996 ir_variable *sig_param = new(mem_ctx)
997 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
998 sig_params.push_tail(sig_param);
999
1000 sig_param = new(mem_ctx)
1001 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
1002 sig_params.push_tail(sig_param);
1003
1004 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
1005 glsl_type::int_type : glsl_type::uint_type;
1006 sig_param = new(mem_ctx)
1007 ir_variable(type, "data1", ir_var_function_in);
1008 sig_params.push_tail(sig_param);
1009
1010 if (param_count == 3) {
1011 sig_param = new(mem_ctx)
1012 ir_variable(type, "data2", ir_var_function_in);
1013 sig_params.push_tail(sig_param);
1014 }
1015
1016 ir_function_signature *sig =
1017 new(mem_ctx) ir_function_signature(deref->type,
1018 shader_storage_buffer_object);
1019 assert(sig);
1020 sig->replace_parameters(&sig_params);
1021 sig->_is_intrinsic = true;
1022
1023 assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
1024 assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
1025 sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo);
1026
1027 char func_name[64];
1028 sprintf(func_name, "%s_ssbo", ir->callee_name());
1029 ir_function *f = new(mem_ctx) ir_function(func_name);
1030 f->add_signature(sig);
1031
1032 /* Now, create the call to the internal intrinsic */
1033 exec_list call_params;
1034 call_params.push_tail(block_index);
1035 call_params.push_tail(deref_offset);
1036 param = ir->actual_parameters.get_head()->get_next();
1037 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1038 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1039 if (param_count == 3) {
1040 param = param->get_next();
1041 param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1042 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1043 }
1044 ir_dereference_variable *return_deref =
1045 ir->return_deref->clone(mem_ctx, NULL);
1046 return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1047 }
1048
1049 ir_call *
1050 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1051 {
1052 exec_list& params = ir->actual_parameters;
1053
1054 if (params.length() < 2 || params.length() > 3)
1055 return ir;
1056
1057 ir_rvalue *rvalue =
1058 ((ir_instruction *) params.get_head())->as_rvalue();
1059 if (!rvalue)
1060 return ir;
1061
1062 ir_variable *var = rvalue->variable_referenced();
1063 if (!var || !var->is_in_shader_storage_block())
1064 return ir;
1065
1066 const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
1067 if (id == ir_intrinsic_generic_atomic_add ||
1068 id == ir_intrinsic_generic_atomic_min ||
1069 id == ir_intrinsic_generic_atomic_max ||
1070 id == ir_intrinsic_generic_atomic_and ||
1071 id == ir_intrinsic_generic_atomic_or ||
1072 id == ir_intrinsic_generic_atomic_xor ||
1073 id == ir_intrinsic_generic_atomic_exchange ||
1074 id == ir_intrinsic_generic_atomic_comp_swap) {
1075 return lower_ssbo_atomic_intrinsic(ir);
1076 }
1077
1078 return ir;
1079 }
1080
1081
1082 ir_visitor_status
1083 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1084 {
1085 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1086 if (new_ir != ir) {
1087 progress = true;
1088 base_ir->replace_with(new_ir);
1089 return visit_continue_with_parent;
1090 }
1091
1092 return rvalue_visit(ir);
1093 }
1094
1095
1096 } /* unnamed namespace */
1097
1098 void
1099 lower_ubo_reference(struct gl_linked_shader *shader, bool clamp_block_indices)
1100 {
1101 lower_ubo_reference_visitor v(shader, clamp_block_indices);
1102
1103 /* Loop over the instructions lowering references, because we take
1104 * a deref of a UBO array using a UBO dereference as the index will
1105 * produce a collection of instructions all of which have cloned
1106 * UBO dereferences for that array index.
1107 */
1108 do {
1109 v.progress = false;
1110 visit_list_elements(&v, shader->ir);
1111 } while (v.progress);
1112 }