04b82d95c316e6a9bf228629b44b2e29ccd6be56
[mesa.git] / src / glsl / lower_ubo_reference.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_ubo_reference.cpp
26 *
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
30 *
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
33 * their own.
34 */
35
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40
41 using namespace ir_builder;
42
43 namespace {
44 class lower_ubo_reference_visitor :
45 public lower_buffer_access::lower_buffer_access {
46 public:
47 lower_ubo_reference_visitor(struct gl_shader *shader)
48 : shader(shader)
49 {
50 }
51
52 void handle_rvalue(ir_rvalue **rvalue);
53 ir_visitor_status visit_enter(ir_assignment *ir);
54
55 void setup_for_load_or_store(ir_variable *var,
56 ir_rvalue *deref,
57 ir_rvalue **offset,
58 unsigned *const_offset,
59 bool *row_major,
60 int *matrix_columns,
61 unsigned packing);
62 ir_expression *ubo_load(const struct glsl_type *type,
63 ir_rvalue *offset);
64 ir_call *ssbo_load(const struct glsl_type *type,
65 ir_rvalue *offset);
66
67 bool check_for_buffer_array_copy(ir_assignment *ir);
68 bool check_for_buffer_struct_copy(ir_assignment *ir);
69 void check_for_ssbo_store(ir_assignment *ir);
70 void write_to_memory(ir_dereference *deref,
71 ir_variable *var,
72 ir_variable *write_var,
73 unsigned write_mask);
74 ir_call *ssbo_store(ir_rvalue *deref, ir_rvalue *offset,
75 unsigned write_mask);
76
77 enum {
78 ubo_load_access,
79 ssbo_load_access,
80 ssbo_store_access,
81 ssbo_unsized_array_length_access,
82 ssbo_atomic_access,
83 } buffer_access_type;
84
85 void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
86 const glsl_type *type, ir_rvalue *offset,
87 unsigned mask, int channel);
88
89 ir_visitor_status visit_enter(class ir_expression *);
90 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
91 void check_ssbo_unsized_array_length_expression(class ir_expression *);
92 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
93
94 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
95 ir_dereference *,
96 ir_variable *);
97 ir_expression *emit_ssbo_get_buffer_size();
98
99 unsigned calculate_unsized_array_stride(ir_dereference *deref,
100 unsigned packing);
101
102 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
103 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
104 ir_visitor_status visit_enter(ir_call *ir);
105
106 void *mem_ctx;
107 struct gl_shader *shader;
108 struct gl_uniform_buffer_variable *ubo_var;
109 ir_rvalue *uniform_block;
110 bool progress;
111 };
112
113 /**
114 * Determine the name of the interface block field
115 *
116 * This is the name of the specific member as it would appear in the
117 * \c gl_uniform_buffer_variable::Name field in the shader's
118 * \c UniformBlocks array.
119 */
120 static const char *
121 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
122 ir_rvalue **nonconst_block_index)
123 {
124 *nonconst_block_index = NULL;
125 char *name_copy = NULL;
126 size_t base_length = 0;
127
128 /* Loop back through the IR until we find the uniform block */
129 ir_rvalue *ir = d;
130 while (ir != NULL) {
131 switch (ir->ir_type) {
132 case ir_type_dereference_variable: {
133 /* Exit loop */
134 ir = NULL;
135 break;
136 }
137
138 case ir_type_dereference_record: {
139 ir_dereference_record *r = (ir_dereference_record *) ir;
140 ir = r->record->as_dereference();
141
142 /* If we got here it means any previous array subscripts belong to
143 * block members and not the block itself so skip over them in the
144 * next pass.
145 */
146 d = ir;
147 break;
148 }
149
150 case ir_type_dereference_array: {
151 ir_dereference_array *a = (ir_dereference_array *) ir;
152 ir = a->array->as_dereference();
153 break;
154 }
155
156 case ir_type_swizzle: {
157 ir_swizzle *s = (ir_swizzle *) ir;
158 ir = s->val->as_dereference();
159 /* Skip swizzle in the next pass */
160 d = ir;
161 break;
162 }
163
164 default:
165 assert(!"Should not get here.");
166 break;
167 }
168 }
169
170 while (d != NULL) {
171 switch (d->ir_type) {
172 case ir_type_dereference_variable: {
173 ir_dereference_variable *v = (ir_dereference_variable *) d;
174 if (name_copy != NULL &&
175 v->var->is_interface_instance() &&
176 v->var->type->is_array()) {
177 return name_copy;
178 } else {
179 *nonconst_block_index = NULL;
180 return base_name;
181 }
182
183 break;
184 }
185
186 case ir_type_dereference_array: {
187 ir_dereference_array *a = (ir_dereference_array *) d;
188 size_t new_length;
189
190 if (name_copy == NULL) {
191 name_copy = ralloc_strdup(mem_ctx, base_name);
192 base_length = strlen(name_copy);
193 }
194
195 /* For arrays of arrays we start at the innermost array and work our
196 * way out so we need to insert the subscript at the base of the
197 * name string rather than just attaching it to the end.
198 */
199 new_length = base_length;
200 ir_constant *const_index = a->array_index->as_constant();
201 char *end = ralloc_strdup(NULL, &name_copy[new_length]);
202 if (!const_index) {
203 ir_rvalue *array_index = a->array_index;
204 if (array_index->type != glsl_type::uint_type)
205 array_index = i2u(array_index);
206
207 if (a->array->type->is_array() &&
208 a->array->type->fields.array->is_array()) {
209 ir_constant *base_size = new(mem_ctx)
210 ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
211 array_index = mul(array_index, base_size);
212 }
213
214 if (*nonconst_block_index) {
215 *nonconst_block_index = add(*nonconst_block_index, array_index);
216 } else {
217 *nonconst_block_index = array_index;
218 }
219
220 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
221 end);
222 } else {
223 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
224 const_index->get_uint_component(0),
225 end);
226 }
227 ralloc_free(end);
228
229 d = a->array->as_dereference();
230
231 break;
232 }
233
234 default:
235 assert(!"Should not get here.");
236 break;
237 }
238 }
239
240 assert(!"Should not get here.");
241 return NULL;
242 }
243
244 void
245 lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
246 ir_rvalue *deref,
247 ir_rvalue **offset,
248 unsigned *const_offset,
249 bool *row_major,
250 int *matrix_columns,
251 unsigned packing)
252 {
253 /* Determine the name of the interface block */
254 ir_rvalue *nonconst_block_index;
255 const char *const field_name =
256 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
257 deref, &nonconst_block_index);
258
259 /* Locate the block by interface name */
260 unsigned num_blocks;
261 struct gl_uniform_block **blocks;
262 if (this->buffer_access_type != ubo_load_access) {
263 num_blocks = shader->NumShaderStorageBlocks;
264 blocks = shader->ShaderStorageBlocks;
265 } else {
266 num_blocks = shader->NumUniformBlocks;
267 blocks = shader->UniformBlocks;
268 }
269 this->uniform_block = NULL;
270 for (unsigned i = 0; i < num_blocks; i++) {
271 if (strcmp(field_name, blocks[i]->Name) == 0) {
272
273 ir_constant *index = new(mem_ctx) ir_constant(i);
274
275 if (nonconst_block_index) {
276 this->uniform_block = add(nonconst_block_index, index);
277 } else {
278 this->uniform_block = index;
279 }
280
281 this->ubo_var = var->is_interface_instance()
282 ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
283
284 break;
285 }
286 }
287
288 assert(this->uniform_block);
289
290 *const_offset = ubo_var->Offset;
291
292 setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
293 matrix_columns, packing);
294 }
295
296 void
297 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
298 {
299 if (!*rvalue)
300 return;
301
302 ir_dereference *deref = (*rvalue)->as_dereference();
303 if (!deref)
304 return;
305
306 ir_variable *var = deref->variable_referenced();
307 if (!var || !var->is_in_buffer_block())
308 return;
309
310 mem_ctx = ralloc_parent(shader->ir);
311
312 ir_rvalue *offset = NULL;
313 unsigned const_offset;
314 bool row_major;
315 int matrix_columns;
316 unsigned packing = var->get_interface_type()->interface_packing;
317
318 this->buffer_access_type =
319 var->is_in_shader_storage_block() ?
320 ssbo_load_access : ubo_load_access;
321
322 /* Compute the offset to the start if the dereference as well as other
323 * information we need to configure the write
324 */
325 setup_for_load_or_store(var, deref,
326 &offset, &const_offset,
327 &row_major, &matrix_columns,
328 packing);
329 assert(offset);
330
331 /* Now that we've calculated the offset to the start of the
332 * dereference, walk over the type and emit loads into a temporary.
333 */
334 const glsl_type *type = (*rvalue)->type;
335 ir_variable *load_var = new(mem_ctx) ir_variable(type,
336 "ubo_load_temp",
337 ir_var_temporary);
338 base_ir->insert_before(load_var);
339
340 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
341 "ubo_load_temp_offset",
342 ir_var_temporary);
343 base_ir->insert_before(load_offset);
344 base_ir->insert_before(assign(load_offset, offset));
345
346 deref = new(mem_ctx) ir_dereference_variable(load_var);
347 emit_access(mem_ctx, false, deref, load_offset, const_offset,
348 row_major, matrix_columns, packing, 0);
349 *rvalue = deref;
350
351 progress = true;
352 }
353
354 ir_expression *
355 lower_ubo_reference_visitor::ubo_load(const glsl_type *type,
356 ir_rvalue *offset)
357 {
358 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
359 return new(mem_ctx)
360 ir_expression(ir_binop_ubo_load,
361 type,
362 block_ref,
363 offset);
364
365 }
366
367 static bool
368 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
369 {
370 return state->ARB_shader_storage_buffer_object_enable;
371 }
372
373 ir_call *
374 lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref,
375 ir_rvalue *offset,
376 unsigned write_mask)
377 {
378 exec_list sig_params;
379
380 ir_variable *block_ref = new(mem_ctx)
381 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
382 sig_params.push_tail(block_ref);
383
384 ir_variable *offset_ref = new(mem_ctx)
385 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
386 sig_params.push_tail(offset_ref);
387
388 ir_variable *val_ref = new(mem_ctx)
389 ir_variable(deref->type, "value" , ir_var_function_in);
390 sig_params.push_tail(val_ref);
391
392 ir_variable *writemask_ref = new(mem_ctx)
393 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
394 sig_params.push_tail(writemask_ref);
395
396 ir_function_signature *sig = new(mem_ctx)
397 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
398 assert(sig);
399 sig->replace_parameters(&sig_params);
400 sig->is_intrinsic = true;
401
402 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
403 f->add_signature(sig);
404
405 exec_list call_params;
406 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
407 call_params.push_tail(offset->clone(mem_ctx, NULL));
408 call_params.push_tail(deref->clone(mem_ctx, NULL));
409 call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
410 return new(mem_ctx) ir_call(sig, NULL, &call_params);
411 }
412
413 ir_call *
414 lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type,
415 ir_rvalue *offset)
416 {
417 exec_list sig_params;
418
419 ir_variable *block_ref = new(mem_ctx)
420 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
421 sig_params.push_tail(block_ref);
422
423 ir_variable *offset_ref = new(mem_ctx)
424 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
425 sig_params.push_tail(offset_ref);
426
427 ir_function_signature *sig =
428 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
429 assert(sig);
430 sig->replace_parameters(&sig_params);
431 sig->is_intrinsic = true;
432
433 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
434 f->add_signature(sig);
435
436 ir_variable *result = new(mem_ctx)
437 ir_variable(type, "ssbo_load_result", ir_var_temporary);
438 base_ir->insert_before(result);
439 ir_dereference_variable *deref_result = new(mem_ctx)
440 ir_dereference_variable(result);
441
442 exec_list call_params;
443 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
444 call_params.push_tail(offset->clone(mem_ctx, NULL));
445
446 return new(mem_ctx) ir_call(sig, deref_result, &call_params);
447 }
448
449 void
450 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
451 ir_dereference *deref,
452 const glsl_type *type,
453 ir_rvalue *offset,
454 unsigned mask,
455 int channel)
456 {
457 switch (this->buffer_access_type) {
458 case ubo_load_access:
459 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
460 ubo_load(type, offset),
461 mask));
462 break;
463 case ssbo_load_access: {
464 ir_call *load_ssbo = ssbo_load(type, offset);
465 base_ir->insert_before(load_ssbo);
466 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
467 ir_assignment *assignment =
468 assign(deref->clone(mem_ctx, NULL), value, mask);
469 base_ir->insert_before(assignment);
470 break;
471 }
472 case ssbo_store_access:
473 if (channel >= 0) {
474 base_ir->insert_after(ssbo_store(swizzle(deref, channel, 1),
475 offset, 1));
476 } else {
477 base_ir->insert_after(ssbo_store(deref, offset, mask));
478 }
479 break;
480 default:
481 unreachable("invalid buffer_access_type in insert_buffer_access");
482 }
483 }
484
485 void
486 lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
487 ir_variable *var,
488 ir_variable *write_var,
489 unsigned write_mask)
490 {
491 ir_rvalue *offset = NULL;
492 unsigned const_offset;
493 bool row_major;
494 int matrix_columns;
495 unsigned packing = var->get_interface_type()->interface_packing;
496
497 this->buffer_access_type = ssbo_store_access;
498
499 /* Compute the offset to the start if the dereference as well as other
500 * information we need to configure the write
501 */
502 setup_for_load_or_store(var, deref,
503 &offset, &const_offset,
504 &row_major, &matrix_columns,
505 packing);
506 assert(offset);
507
508 /* Now emit writes from the temporary to memory */
509 ir_variable *write_offset =
510 new(mem_ctx) ir_variable(glsl_type::uint_type,
511 "ssbo_store_temp_offset",
512 ir_var_temporary);
513
514 base_ir->insert_before(write_offset);
515 base_ir->insert_before(assign(write_offset, offset));
516
517 deref = new(mem_ctx) ir_dereference_variable(write_var);
518 emit_access(mem_ctx, true, deref, write_offset, const_offset,
519 row_major, matrix_columns, packing, write_mask);
520 }
521
522 ir_visitor_status
523 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
524 {
525 check_ssbo_unsized_array_length_expression(ir);
526 return rvalue_visit(ir);
527 }
528
529 ir_expression *
530 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
531 {
532 if (expr->operation !=
533 ir_expression_operation(ir_unop_ssbo_unsized_array_length))
534 return NULL;
535
536 ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
537 if (!rvalue ||
538 !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
539 return NULL;
540
541 ir_dereference *deref = expr->operands[0]->as_dereference();
542 if (!deref)
543 return NULL;
544
545 ir_variable *var = expr->operands[0]->variable_referenced();
546 if (!var || !var->is_in_shader_storage_block())
547 return NULL;
548 return process_ssbo_unsized_array_length(&rvalue, deref, var);
549 }
550
551 void
552 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
553 {
554 if (ir->operation ==
555 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
556 /* Don't replace this unop if it is found alone. It is going to be
557 * removed by the optimization passes or replaced if it is part of
558 * an ir_assignment or another ir_expression.
559 */
560 return;
561 }
562
563 for (unsigned i = 0; i < ir->get_num_operands(); i++) {
564 if (ir->operands[i]->ir_type != ir_type_expression)
565 continue;
566 ir_expression *expr = (ir_expression *) ir->operands[i];
567 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
568 if (!temp)
569 continue;
570
571 delete expr;
572 ir->operands[i] = temp;
573 }
574 }
575
576 void
577 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
578 {
579 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
580 return;
581
582 ir_expression *expr = (ir_expression *) ir->rhs;
583 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
584 if (!temp)
585 return;
586
587 delete expr;
588 ir->rhs = temp;
589 return;
590 }
591
592 ir_expression *
593 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size()
594 {
595 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
596 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
597 glsl_type::int_type,
598 block_ref);
599 }
600
601 unsigned
602 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
603 unsigned packing)
604 {
605 unsigned array_stride = 0;
606
607 switch (deref->ir_type) {
608 case ir_type_dereference_variable:
609 {
610 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
611 const struct glsl_type *unsized_array_type = NULL;
612 /* An unsized array can be sized by other lowering passes, so pick
613 * the first field of the array which has the data type of the unsized
614 * array.
615 */
616 unsized_array_type = deref_var->var->type->fields.array;
617
618 /* Whether or not the field is row-major (because it might be a
619 * bvec2 or something) does not affect the array itself. We need
620 * to know whether an array element in its entirety is row-major.
621 */
622 const bool array_row_major =
623 is_dereferenced_thing_row_major(deref_var);
624
625 if (packing == GLSL_INTERFACE_PACKING_STD430) {
626 array_stride = unsized_array_type->std430_array_stride(array_row_major);
627 } else {
628 array_stride = unsized_array_type->std140_size(array_row_major);
629 array_stride = glsl_align(array_stride, 16);
630 }
631 break;
632 }
633 case ir_type_dereference_record:
634 {
635 ir_dereference_record *deref_record = (ir_dereference_record *) deref;
636 ir_dereference *interface_deref =
637 deref_record->record->as_dereference();
638 assert(interface_deref != NULL);
639 const struct glsl_type *interface_type = interface_deref->type;
640 unsigned record_length = interface_type->length;
641 /* Unsized array is always the last element of the interface */
642 const struct glsl_type *unsized_array_type =
643 interface_type->fields.structure[record_length - 1].type->fields.array;
644
645 const bool array_row_major =
646 is_dereferenced_thing_row_major(deref_record);
647
648 if (packing == GLSL_INTERFACE_PACKING_STD430) {
649 array_stride = unsized_array_type->std430_array_stride(array_row_major);
650 } else {
651 array_stride = unsized_array_type->std140_size(array_row_major);
652 array_stride = glsl_align(array_stride, 16);
653 }
654 break;
655 }
656 default:
657 unreachable("Unsupported dereference type");
658 }
659 return array_stride;
660 }
661
662 ir_expression *
663 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
664 ir_dereference *deref,
665 ir_variable *var)
666 {
667 mem_ctx = ralloc_parent(*rvalue);
668
669 ir_rvalue *base_offset = NULL;
670 unsigned const_offset;
671 bool row_major;
672 int matrix_columns;
673 unsigned packing = var->get_interface_type()->interface_packing;
674 int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
675
676 this->buffer_access_type = ssbo_unsized_array_length_access;
677
678 /* Compute the offset to the start if the dereference as well as other
679 * information we need to calculate the length.
680 */
681 setup_for_load_or_store(var, deref,
682 &base_offset, &const_offset,
683 &row_major, &matrix_columns,
684 packing);
685 /* array.length() =
686 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
687 */
688 ir_expression *buffer_size = emit_ssbo_get_buffer_size();
689
690 ir_expression *offset_of_array = new(mem_ctx)
691 ir_expression(ir_binop_add, base_offset,
692 new(mem_ctx) ir_constant(const_offset));
693 ir_expression *offset_of_array_int = new(mem_ctx)
694 ir_expression(ir_unop_u2i, offset_of_array);
695
696 ir_expression *sub = new(mem_ctx)
697 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
698 ir_expression *div = new(mem_ctx)
699 ir_expression(ir_binop_div, sub,
700 new(mem_ctx) ir_constant(unsized_array_stride));
701 ir_expression *max = new(mem_ctx)
702 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
703
704 return max;
705 }
706
707 void
708 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
709 {
710 if (!ir || !ir->lhs)
711 return;
712
713 ir_rvalue *rvalue = ir->lhs->as_rvalue();
714 if (!rvalue)
715 return;
716
717 ir_dereference *deref = ir->lhs->as_dereference();
718 if (!deref)
719 return;
720
721 ir_variable *var = ir->lhs->variable_referenced();
722 if (!var || !var->is_in_buffer_block())
723 return;
724
725 /* We have a write to a buffer variable, so declare a temporary and rewrite
726 * the assignment so that the temporary is the LHS.
727 */
728 mem_ctx = ralloc_parent(shader->ir);
729
730 const glsl_type *type = rvalue->type;
731 ir_variable *write_var = new(mem_ctx) ir_variable(type,
732 "ssbo_store_temp",
733 ir_var_temporary);
734 base_ir->insert_before(write_var);
735 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
736
737 /* Now we have to write the value assigned to the temporary back to memory */
738 write_to_memory(deref, var, write_var, ir->write_mask);
739 progress = true;
740 }
741
742 static bool
743 is_buffer_backed_variable(ir_variable *var)
744 {
745 return var->is_in_buffer_block() ||
746 var->data.mode == ir_var_shader_shared;
747 }
748
749 bool
750 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
751 {
752 if (!ir || !ir->lhs || !ir->rhs)
753 return false;
754
755 /* LHS and RHS must be arrays
756 * FIXME: arrays of arrays?
757 */
758 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
759 return false;
760
761 /* RHS must be a buffer-backed variable. This is what can cause the problem
762 * since it would lead to a series of loads that need to live until we
763 * see the writes to the LHS.
764 */
765 ir_variable *rhs_var = ir->rhs->variable_referenced();
766 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
767 return false;
768
769 /* Split the array copy into individual element copies to reduce
770 * register pressure
771 */
772 ir_dereference *rhs_deref = ir->rhs->as_dereference();
773 if (!rhs_deref)
774 return false;
775
776 ir_dereference *lhs_deref = ir->lhs->as_dereference();
777 if (!lhs_deref)
778 return false;
779
780 assert(lhs_deref->type->length == rhs_deref->type->length);
781 mem_ctx = ralloc_parent(shader->ir);
782
783 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
784 ir_dereference *lhs_i =
785 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
786 new(mem_ctx) ir_constant(i));
787
788 ir_dereference *rhs_i =
789 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
790 new(mem_ctx) ir_constant(i));
791 ir->insert_after(assign(lhs_i, rhs_i));
792 }
793
794 ir->remove();
795 progress = true;
796 return true;
797 }
798
799 bool
800 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
801 {
802 if (!ir || !ir->lhs || !ir->rhs)
803 return false;
804
805 /* LHS and RHS must be records */
806 if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
807 return false;
808
809 /* RHS must be a buffer-backed variable. This is what can cause the problem
810 * since it would lead to a series of loads that need to live until we
811 * see the writes to the LHS.
812 */
813 ir_variable *rhs_var = ir->rhs->variable_referenced();
814 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
815 return false;
816
817 /* Split the struct copy into individual element copies to reduce
818 * register pressure
819 */
820 ir_dereference *rhs_deref = ir->rhs->as_dereference();
821 if (!rhs_deref)
822 return false;
823
824 ir_dereference *lhs_deref = ir->lhs->as_dereference();
825 if (!lhs_deref)
826 return false;
827
828 assert(lhs_deref->type->record_compare(rhs_deref->type));
829 mem_ctx = ralloc_parent(shader->ir);
830
831 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
832 const char *field_name = lhs_deref->type->fields.structure[i].name;
833 ir_dereference *lhs_field =
834 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
835 field_name);
836 ir_dereference *rhs_field =
837 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
838 field_name);
839 ir->insert_after(assign(lhs_field, rhs_field));
840 }
841
842 ir->remove();
843 progress = true;
844 return true;
845 }
846
847 ir_visitor_status
848 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
849 {
850 /* Array and struct copies could involve large amounts of load/store
851 * operations. To improve register pressure we want to special-case
852 * these and split them into individual element copies.
853 * This way we avoid emitting all the loads for the RHS first and
854 * all the writes for the LHS second and register usage is more
855 * efficient.
856 */
857 if (check_for_buffer_array_copy(ir))
858 return visit_continue_with_parent;
859
860 if (check_for_buffer_struct_copy(ir))
861 return visit_continue_with_parent;
862
863 check_ssbo_unsized_array_length_assignment(ir);
864 check_for_ssbo_store(ir);
865 return rvalue_visit(ir);
866 }
867
868 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
869 * access to the buffer variable in the first parameter by an offset
870 * and block index. This involves creating the new internal intrinsic
871 * (i.e. the new function signature).
872 */
873 ir_call *
874 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
875 {
876 /* SSBO atomics usually have 2 parameters, the buffer variable and an
877 * integer argument. The exception is CompSwap, that has an additional
878 * integer parameter.
879 */
880 int param_count = ir->actual_parameters.length();
881 assert(param_count == 2 || param_count == 3);
882
883 /* First argument must be a scalar integer buffer variable */
884 exec_node *param = ir->actual_parameters.get_head();
885 ir_instruction *inst = (ir_instruction *) param;
886 assert(inst->ir_type == ir_type_dereference_variable ||
887 inst->ir_type == ir_type_dereference_array ||
888 inst->ir_type == ir_type_dereference_record ||
889 inst->ir_type == ir_type_swizzle);
890
891 ir_rvalue *deref = (ir_rvalue *) inst;
892 assert(deref->type->is_scalar() && deref->type->is_integer());
893
894 ir_variable *var = deref->variable_referenced();
895 assert(var);
896
897 /* Compute the offset to the start if the dereference and the
898 * block index
899 */
900 mem_ctx = ralloc_parent(shader->ir);
901
902 ir_rvalue *offset = NULL;
903 unsigned const_offset;
904 bool row_major;
905 int matrix_columns;
906 unsigned packing = var->get_interface_type()->interface_packing;
907
908 this->buffer_access_type = ssbo_atomic_access;
909
910 setup_for_load_or_store(var, deref,
911 &offset, &const_offset,
912 &row_major, &matrix_columns,
913 packing);
914 assert(offset);
915 assert(!row_major);
916 assert(matrix_columns == 1);
917
918 ir_rvalue *deref_offset =
919 add(offset, new(mem_ctx) ir_constant(const_offset));
920 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
921
922 /* Create the new internal function signature that will take a block
923 * index and offset instead of a buffer variable
924 */
925 exec_list sig_params;
926 ir_variable *sig_param = new(mem_ctx)
927 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
928 sig_params.push_tail(sig_param);
929
930 sig_param = new(mem_ctx)
931 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
932 sig_params.push_tail(sig_param);
933
934 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
935 glsl_type::int_type : glsl_type::uint_type;
936 sig_param = new(mem_ctx)
937 ir_variable(type, "data1", ir_var_function_in);
938 sig_params.push_tail(sig_param);
939
940 if (param_count == 3) {
941 sig_param = new(mem_ctx)
942 ir_variable(type, "data2", ir_var_function_in);
943 sig_params.push_tail(sig_param);
944 }
945
946 ir_function_signature *sig =
947 new(mem_ctx) ir_function_signature(deref->type,
948 shader_storage_buffer_object);
949 assert(sig);
950 sig->replace_parameters(&sig_params);
951 sig->is_intrinsic = true;
952
953 char func_name[64];
954 sprintf(func_name, "%s_internal", ir->callee_name());
955 ir_function *f = new(mem_ctx) ir_function(func_name);
956 f->add_signature(sig);
957
958 /* Now, create the call to the internal intrinsic */
959 exec_list call_params;
960 call_params.push_tail(block_index);
961 call_params.push_tail(deref_offset);
962 param = ir->actual_parameters.get_head()->get_next();
963 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
964 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
965 if (param_count == 3) {
966 param = param->get_next();
967 param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
968 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
969 }
970 ir_dereference_variable *return_deref =
971 ir->return_deref->clone(mem_ctx, NULL);
972 return new(mem_ctx) ir_call(sig, return_deref, &call_params);
973 }
974
975 ir_call *
976 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
977 {
978 const char *callee = ir->callee_name();
979 if (!strcmp("__intrinsic_ssbo_atomic_add", callee) ||
980 !strcmp("__intrinsic_ssbo_atomic_min", callee) ||
981 !strcmp("__intrinsic_ssbo_atomic_max", callee) ||
982 !strcmp("__intrinsic_ssbo_atomic_and", callee) ||
983 !strcmp("__intrinsic_ssbo_atomic_or", callee) ||
984 !strcmp("__intrinsic_ssbo_atomic_xor", callee) ||
985 !strcmp("__intrinsic_ssbo_atomic_exchange", callee) ||
986 !strcmp("__intrinsic_ssbo_atomic_comp_swap", callee)) {
987 return lower_ssbo_atomic_intrinsic(ir);
988 }
989
990 return ir;
991 }
992
993
994 ir_visitor_status
995 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
996 {
997 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
998 if (new_ir != ir) {
999 progress = true;
1000 base_ir->replace_with(new_ir);
1001 return visit_continue_with_parent;
1002 }
1003
1004 return rvalue_visit(ir);
1005 }
1006
1007
1008 } /* unnamed namespace */
1009
1010 void
1011 lower_ubo_reference(struct gl_shader *shader)
1012 {
1013 lower_ubo_reference_visitor v(shader);
1014
1015 /* Loop over the instructions lowering references, because we take
1016 * a deref of a UBO array using a UBO dereference as the index will
1017 * produce a collection of instructions all of which have cloned
1018 * UBO dereferences for that array index.
1019 */
1020 do {
1021 v.progress = false;
1022 visit_list_elements(&v, shader->ir);
1023 } while (v.progress);
1024 }