glsl: move to compiler/
[mesa.git] / src / compiler / glsl / lower_ubo_reference.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_ubo_reference.cpp
26 *
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
30 *
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
33 * their own.
34 */
35
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40
41 using namespace ir_builder;
42
43 namespace {
44 class lower_ubo_reference_visitor :
45 public lower_buffer_access::lower_buffer_access {
46 public:
47 lower_ubo_reference_visitor(struct gl_shader *shader)
48 : shader(shader)
49 {
50 }
51
52 void handle_rvalue(ir_rvalue **rvalue);
53 ir_visitor_status visit_enter(ir_assignment *ir);
54
55 void setup_for_load_or_store(void *mem_ctx,
56 ir_variable *var,
57 ir_rvalue *deref,
58 ir_rvalue **offset,
59 unsigned *const_offset,
60 bool *row_major,
61 int *matrix_columns,
62 unsigned packing);
63 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
64 ir_rvalue *offset);
65 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
66 ir_rvalue *offset);
67
68 bool check_for_buffer_array_copy(ir_assignment *ir);
69 bool check_for_buffer_struct_copy(ir_assignment *ir);
70 void check_for_ssbo_store(ir_assignment *ir);
71 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
72 ir_variable *write_var, unsigned write_mask);
73 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
74 unsigned write_mask);
75
76 enum {
77 ubo_load_access,
78 ssbo_load_access,
79 ssbo_store_access,
80 ssbo_unsized_array_length_access,
81 ssbo_atomic_access,
82 } buffer_access_type;
83
84 void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
85 const glsl_type *type, ir_rvalue *offset,
86 unsigned mask, int channel);
87
88 ir_visitor_status visit_enter(class ir_expression *);
89 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
90 void check_ssbo_unsized_array_length_expression(class ir_expression *);
91 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
92
93 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
94 ir_dereference *,
95 ir_variable *);
96 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
97
98 unsigned calculate_unsized_array_stride(ir_dereference *deref,
99 unsigned packing);
100
101 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
102 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
103 ir_visitor_status visit_enter(ir_call *ir);
104
105 struct gl_shader *shader;
106 struct gl_uniform_buffer_variable *ubo_var;
107 ir_rvalue *uniform_block;
108 bool progress;
109 };
110
111 /**
112 * Determine the name of the interface block field
113 *
114 * This is the name of the specific member as it would appear in the
115 * \c gl_uniform_buffer_variable::Name field in the shader's
116 * \c UniformBlocks array.
117 */
118 static const char *
119 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
120 ir_rvalue **nonconst_block_index)
121 {
122 *nonconst_block_index = NULL;
123 char *name_copy = NULL;
124 size_t base_length = 0;
125
126 /* Loop back through the IR until we find the uniform block */
127 ir_rvalue *ir = d;
128 while (ir != NULL) {
129 switch (ir->ir_type) {
130 case ir_type_dereference_variable: {
131 /* Exit loop */
132 ir = NULL;
133 break;
134 }
135
136 case ir_type_dereference_record: {
137 ir_dereference_record *r = (ir_dereference_record *) ir;
138 ir = r->record->as_dereference();
139
140 /* If we got here it means any previous array subscripts belong to
141 * block members and not the block itself so skip over them in the
142 * next pass.
143 */
144 d = ir;
145 break;
146 }
147
148 case ir_type_dereference_array: {
149 ir_dereference_array *a = (ir_dereference_array *) ir;
150 ir = a->array->as_dereference();
151 break;
152 }
153
154 case ir_type_swizzle: {
155 ir_swizzle *s = (ir_swizzle *) ir;
156 ir = s->val->as_dereference();
157 /* Skip swizzle in the next pass */
158 d = ir;
159 break;
160 }
161
162 default:
163 assert(!"Should not get here.");
164 break;
165 }
166 }
167
168 while (d != NULL) {
169 switch (d->ir_type) {
170 case ir_type_dereference_variable: {
171 ir_dereference_variable *v = (ir_dereference_variable *) d;
172 if (name_copy != NULL &&
173 v->var->is_interface_instance() &&
174 v->var->type->is_array()) {
175 return name_copy;
176 } else {
177 *nonconst_block_index = NULL;
178 return base_name;
179 }
180
181 break;
182 }
183
184 case ir_type_dereference_array: {
185 ir_dereference_array *a = (ir_dereference_array *) d;
186 size_t new_length;
187
188 if (name_copy == NULL) {
189 name_copy = ralloc_strdup(mem_ctx, base_name);
190 base_length = strlen(name_copy);
191 }
192
193 /* For arrays of arrays we start at the innermost array and work our
194 * way out so we need to insert the subscript at the base of the
195 * name string rather than just attaching it to the end.
196 */
197 new_length = base_length;
198 ir_constant *const_index = a->array_index->as_constant();
199 char *end = ralloc_strdup(NULL, &name_copy[new_length]);
200 if (!const_index) {
201 ir_rvalue *array_index = a->array_index;
202 if (array_index->type != glsl_type::uint_type)
203 array_index = i2u(array_index);
204
205 if (a->array->type->is_array() &&
206 a->array->type->fields.array->is_array()) {
207 ir_constant *base_size = new(mem_ctx)
208 ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
209 array_index = mul(array_index, base_size);
210 }
211
212 if (*nonconst_block_index) {
213 *nonconst_block_index = add(*nonconst_block_index, array_index);
214 } else {
215 *nonconst_block_index = array_index;
216 }
217
218 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
219 end);
220 } else {
221 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
222 const_index->get_uint_component(0),
223 end);
224 }
225 ralloc_free(end);
226
227 d = a->array->as_dereference();
228
229 break;
230 }
231
232 default:
233 assert(!"Should not get here.");
234 break;
235 }
236 }
237
238 assert(!"Should not get here.");
239 return NULL;
240 }
241
242 void
243 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
244 ir_variable *var,
245 ir_rvalue *deref,
246 ir_rvalue **offset,
247 unsigned *const_offset,
248 bool *row_major,
249 int *matrix_columns,
250 unsigned packing)
251 {
252 /* Determine the name of the interface block */
253 ir_rvalue *nonconst_block_index;
254 const char *const field_name =
255 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
256 deref, &nonconst_block_index);
257
258 /* Locate the block by interface name */
259 unsigned num_blocks;
260 struct gl_uniform_block **blocks;
261 if (this->buffer_access_type != ubo_load_access) {
262 num_blocks = shader->NumShaderStorageBlocks;
263 blocks = shader->ShaderStorageBlocks;
264 } else {
265 num_blocks = shader->NumUniformBlocks;
266 blocks = shader->UniformBlocks;
267 }
268 this->uniform_block = NULL;
269 for (unsigned i = 0; i < num_blocks; i++) {
270 if (strcmp(field_name, blocks[i]->Name) == 0) {
271
272 ir_constant *index = new(mem_ctx) ir_constant(i);
273
274 if (nonconst_block_index) {
275 this->uniform_block = add(nonconst_block_index, index);
276 } else {
277 this->uniform_block = index;
278 }
279
280 this->ubo_var = var->is_interface_instance()
281 ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
282
283 break;
284 }
285 }
286
287 assert(this->uniform_block);
288
289 *const_offset = ubo_var->Offset;
290
291 setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
292 matrix_columns, packing);
293 }
294
295 void
296 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
297 {
298 if (!*rvalue)
299 return;
300
301 ir_dereference *deref = (*rvalue)->as_dereference();
302 if (!deref)
303 return;
304
305 ir_variable *var = deref->variable_referenced();
306 if (!var || !var->is_in_buffer_block())
307 return;
308
309 void *mem_ctx = ralloc_parent(shader->ir);
310
311 ir_rvalue *offset = NULL;
312 unsigned const_offset;
313 bool row_major;
314 int matrix_columns;
315 unsigned packing = var->get_interface_type()->interface_packing;
316
317 this->buffer_access_type =
318 var->is_in_shader_storage_block() ?
319 ssbo_load_access : ubo_load_access;
320
321 /* Compute the offset to the start if the dereference as well as other
322 * information we need to configure the write
323 */
324 setup_for_load_or_store(mem_ctx, var, deref,
325 &offset, &const_offset,
326 &row_major, &matrix_columns,
327 packing);
328 assert(offset);
329
330 /* Now that we've calculated the offset to the start of the
331 * dereference, walk over the type and emit loads into a temporary.
332 */
333 const glsl_type *type = (*rvalue)->type;
334 ir_variable *load_var = new(mem_ctx) ir_variable(type,
335 "ubo_load_temp",
336 ir_var_temporary);
337 base_ir->insert_before(load_var);
338
339 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
340 "ubo_load_temp_offset",
341 ir_var_temporary);
342 base_ir->insert_before(load_offset);
343 base_ir->insert_before(assign(load_offset, offset));
344
345 deref = new(mem_ctx) ir_dereference_variable(load_var);
346 emit_access(mem_ctx, false, deref, load_offset, const_offset,
347 row_major, matrix_columns, packing, 0);
348 *rvalue = deref;
349
350 progress = true;
351 }
352
353 ir_expression *
354 lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
355 const glsl_type *type,
356 ir_rvalue *offset)
357 {
358 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
359 return new(mem_ctx)
360 ir_expression(ir_binop_ubo_load,
361 type,
362 block_ref,
363 offset);
364
365 }
366
367 static bool
368 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
369 {
370 return state->ARB_shader_storage_buffer_object_enable;
371 }
372
373 ir_call *
374 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
375 ir_rvalue *deref,
376 ir_rvalue *offset,
377 unsigned write_mask)
378 {
379 exec_list sig_params;
380
381 ir_variable *block_ref = new(mem_ctx)
382 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
383 sig_params.push_tail(block_ref);
384
385 ir_variable *offset_ref = new(mem_ctx)
386 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
387 sig_params.push_tail(offset_ref);
388
389 ir_variable *val_ref = new(mem_ctx)
390 ir_variable(deref->type, "value" , ir_var_function_in);
391 sig_params.push_tail(val_ref);
392
393 ir_variable *writemask_ref = new(mem_ctx)
394 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
395 sig_params.push_tail(writemask_ref);
396
397 ir_function_signature *sig = new(mem_ctx)
398 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
399 assert(sig);
400 sig->replace_parameters(&sig_params);
401 sig->is_intrinsic = true;
402
403 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
404 f->add_signature(sig);
405
406 exec_list call_params;
407 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
408 call_params.push_tail(offset->clone(mem_ctx, NULL));
409 call_params.push_tail(deref->clone(mem_ctx, NULL));
410 call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
411 return new(mem_ctx) ir_call(sig, NULL, &call_params);
412 }
413
414 ir_call *
415 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
416 const struct glsl_type *type,
417 ir_rvalue *offset)
418 {
419 exec_list sig_params;
420
421 ir_variable *block_ref = new(mem_ctx)
422 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
423 sig_params.push_tail(block_ref);
424
425 ir_variable *offset_ref = new(mem_ctx)
426 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
427 sig_params.push_tail(offset_ref);
428
429 ir_function_signature *sig =
430 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
431 assert(sig);
432 sig->replace_parameters(&sig_params);
433 sig->is_intrinsic = true;
434
435 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
436 f->add_signature(sig);
437
438 ir_variable *result = new(mem_ctx)
439 ir_variable(type, "ssbo_load_result", ir_var_temporary);
440 base_ir->insert_before(result);
441 ir_dereference_variable *deref_result = new(mem_ctx)
442 ir_dereference_variable(result);
443
444 exec_list call_params;
445 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
446 call_params.push_tail(offset->clone(mem_ctx, NULL));
447
448 return new(mem_ctx) ir_call(sig, deref_result, &call_params);
449 }
450
451 void
452 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
453 ir_dereference *deref,
454 const glsl_type *type,
455 ir_rvalue *offset,
456 unsigned mask,
457 int channel)
458 {
459 switch (this->buffer_access_type) {
460 case ubo_load_access:
461 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
462 ubo_load(mem_ctx, type, offset),
463 mask));
464 break;
465 case ssbo_load_access: {
466 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
467 base_ir->insert_before(load_ssbo);
468 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
469 ir_assignment *assignment =
470 assign(deref->clone(mem_ctx, NULL), value, mask);
471 base_ir->insert_before(assignment);
472 break;
473 }
474 case ssbo_store_access:
475 if (channel >= 0) {
476 base_ir->insert_after(ssbo_store(mem_ctx,
477 swizzle(deref, channel, 1),
478 offset, 1));
479 } else {
480 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
481 }
482 break;
483 default:
484 unreachable("invalid buffer_access_type in insert_buffer_access");
485 }
486 }
487
488 void
489 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
490 ir_dereference *deref,
491 ir_variable *var,
492 ir_variable *write_var,
493 unsigned write_mask)
494 {
495 ir_rvalue *offset = NULL;
496 unsigned const_offset;
497 bool row_major;
498 int matrix_columns;
499 unsigned packing = var->get_interface_type()->interface_packing;
500
501 this->buffer_access_type = ssbo_store_access;
502
503 /* Compute the offset to the start if the dereference as well as other
504 * information we need to configure the write
505 */
506 setup_for_load_or_store(mem_ctx, var, deref,
507 &offset, &const_offset,
508 &row_major, &matrix_columns,
509 packing);
510 assert(offset);
511
512 /* Now emit writes from the temporary to memory */
513 ir_variable *write_offset =
514 new(mem_ctx) ir_variable(glsl_type::uint_type,
515 "ssbo_store_temp_offset",
516 ir_var_temporary);
517
518 base_ir->insert_before(write_offset);
519 base_ir->insert_before(assign(write_offset, offset));
520
521 deref = new(mem_ctx) ir_dereference_variable(write_var);
522 emit_access(mem_ctx, true, deref, write_offset, const_offset,
523 row_major, matrix_columns, packing, write_mask);
524 }
525
526 ir_visitor_status
527 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
528 {
529 check_ssbo_unsized_array_length_expression(ir);
530 return rvalue_visit(ir);
531 }
532
533 ir_expression *
534 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
535 {
536 if (expr->operation !=
537 ir_expression_operation(ir_unop_ssbo_unsized_array_length))
538 return NULL;
539
540 ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
541 if (!rvalue ||
542 !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
543 return NULL;
544
545 ir_dereference *deref = expr->operands[0]->as_dereference();
546 if (!deref)
547 return NULL;
548
549 ir_variable *var = expr->operands[0]->variable_referenced();
550 if (!var || !var->is_in_shader_storage_block())
551 return NULL;
552 return process_ssbo_unsized_array_length(&rvalue, deref, var);
553 }
554
555 void
556 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
557 {
558 if (ir->operation ==
559 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
560 /* Don't replace this unop if it is found alone. It is going to be
561 * removed by the optimization passes or replaced if it is part of
562 * an ir_assignment or another ir_expression.
563 */
564 return;
565 }
566
567 for (unsigned i = 0; i < ir->get_num_operands(); i++) {
568 if (ir->operands[i]->ir_type != ir_type_expression)
569 continue;
570 ir_expression *expr = (ir_expression *) ir->operands[i];
571 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
572 if (!temp)
573 continue;
574
575 delete expr;
576 ir->operands[i] = temp;
577 }
578 }
579
580 void
581 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
582 {
583 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
584 return;
585
586 ir_expression *expr = (ir_expression *) ir->rhs;
587 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
588 if (!temp)
589 return;
590
591 delete expr;
592 ir->rhs = temp;
593 return;
594 }
595
596 ir_expression *
597 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
598 {
599 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
600 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
601 glsl_type::int_type,
602 block_ref);
603 }
604
605 unsigned
606 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
607 unsigned packing)
608 {
609 unsigned array_stride = 0;
610
611 switch (deref->ir_type) {
612 case ir_type_dereference_variable:
613 {
614 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
615 const struct glsl_type *unsized_array_type = NULL;
616 /* An unsized array can be sized by other lowering passes, so pick
617 * the first field of the array which has the data type of the unsized
618 * array.
619 */
620 unsized_array_type = deref_var->var->type->fields.array;
621
622 /* Whether or not the field is row-major (because it might be a
623 * bvec2 or something) does not affect the array itself. We need
624 * to know whether an array element in its entirety is row-major.
625 */
626 const bool array_row_major =
627 is_dereferenced_thing_row_major(deref_var);
628
629 if (packing == GLSL_INTERFACE_PACKING_STD430) {
630 array_stride = unsized_array_type->std430_array_stride(array_row_major);
631 } else {
632 array_stride = unsized_array_type->std140_size(array_row_major);
633 array_stride = glsl_align(array_stride, 16);
634 }
635 break;
636 }
637 case ir_type_dereference_record:
638 {
639 ir_dereference_record *deref_record = (ir_dereference_record *) deref;
640 ir_dereference *interface_deref =
641 deref_record->record->as_dereference();
642 assert(interface_deref != NULL);
643 const struct glsl_type *interface_type = interface_deref->type;
644 unsigned record_length = interface_type->length;
645 /* Unsized array is always the last element of the interface */
646 const struct glsl_type *unsized_array_type =
647 interface_type->fields.structure[record_length - 1].type->fields.array;
648
649 const bool array_row_major =
650 is_dereferenced_thing_row_major(deref_record);
651
652 if (packing == GLSL_INTERFACE_PACKING_STD430) {
653 array_stride = unsized_array_type->std430_array_stride(array_row_major);
654 } else {
655 array_stride = unsized_array_type->std140_size(array_row_major);
656 array_stride = glsl_align(array_stride, 16);
657 }
658 break;
659 }
660 default:
661 unreachable("Unsupported dereference type");
662 }
663 return array_stride;
664 }
665
666 ir_expression *
667 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
668 ir_dereference *deref,
669 ir_variable *var)
670 {
671 void *mem_ctx = ralloc_parent(*rvalue);
672
673 ir_rvalue *base_offset = NULL;
674 unsigned const_offset;
675 bool row_major;
676 int matrix_columns;
677 unsigned packing = var->get_interface_type()->interface_packing;
678 int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
679
680 this->buffer_access_type = ssbo_unsized_array_length_access;
681
682 /* Compute the offset to the start if the dereference as well as other
683 * information we need to calculate the length.
684 */
685 setup_for_load_or_store(mem_ctx, var, deref,
686 &base_offset, &const_offset,
687 &row_major, &matrix_columns,
688 packing);
689 /* array.length() =
690 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
691 */
692 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
693
694 ir_expression *offset_of_array = new(mem_ctx)
695 ir_expression(ir_binop_add, base_offset,
696 new(mem_ctx) ir_constant(const_offset));
697 ir_expression *offset_of_array_int = new(mem_ctx)
698 ir_expression(ir_unop_u2i, offset_of_array);
699
700 ir_expression *sub = new(mem_ctx)
701 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
702 ir_expression *div = new(mem_ctx)
703 ir_expression(ir_binop_div, sub,
704 new(mem_ctx) ir_constant(unsized_array_stride));
705 ir_expression *max = new(mem_ctx)
706 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
707
708 return max;
709 }
710
711 void
712 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
713 {
714 if (!ir || !ir->lhs)
715 return;
716
717 ir_rvalue *rvalue = ir->lhs->as_rvalue();
718 if (!rvalue)
719 return;
720
721 ir_dereference *deref = ir->lhs->as_dereference();
722 if (!deref)
723 return;
724
725 ir_variable *var = ir->lhs->variable_referenced();
726 if (!var || !var->is_in_shader_storage_block())
727 return;
728
729 /* We have a write to a buffer variable, so declare a temporary and rewrite
730 * the assignment so that the temporary is the LHS.
731 */
732 void *mem_ctx = ralloc_parent(shader->ir);
733
734 const glsl_type *type = rvalue->type;
735 ir_variable *write_var = new(mem_ctx) ir_variable(type,
736 "ssbo_store_temp",
737 ir_var_temporary);
738 base_ir->insert_before(write_var);
739 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
740
741 /* Now we have to write the value assigned to the temporary back to memory */
742 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
743 progress = true;
744 }
745
746 static bool
747 is_buffer_backed_variable(ir_variable *var)
748 {
749 return var->is_in_buffer_block() ||
750 var->data.mode == ir_var_shader_shared;
751 }
752
753 bool
754 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
755 {
756 if (!ir || !ir->lhs || !ir->rhs)
757 return false;
758
759 /* LHS and RHS must be arrays
760 * FIXME: arrays of arrays?
761 */
762 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
763 return false;
764
765 /* RHS must be a buffer-backed variable. This is what can cause the problem
766 * since it would lead to a series of loads that need to live until we
767 * see the writes to the LHS.
768 */
769 ir_variable *rhs_var = ir->rhs->variable_referenced();
770 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
771 return false;
772
773 /* Split the array copy into individual element copies to reduce
774 * register pressure
775 */
776 ir_dereference *rhs_deref = ir->rhs->as_dereference();
777 if (!rhs_deref)
778 return false;
779
780 ir_dereference *lhs_deref = ir->lhs->as_dereference();
781 if (!lhs_deref)
782 return false;
783
784 assert(lhs_deref->type->length == rhs_deref->type->length);
785 void *mem_ctx = ralloc_parent(shader->ir);
786
787 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
788 ir_dereference *lhs_i =
789 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
790 new(mem_ctx) ir_constant(i));
791
792 ir_dereference *rhs_i =
793 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
794 new(mem_ctx) ir_constant(i));
795 ir->insert_after(assign(lhs_i, rhs_i));
796 }
797
798 ir->remove();
799 progress = true;
800 return true;
801 }
802
803 bool
804 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
805 {
806 if (!ir || !ir->lhs || !ir->rhs)
807 return false;
808
809 /* LHS and RHS must be records */
810 if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
811 return false;
812
813 /* RHS must be a buffer-backed variable. This is what can cause the problem
814 * since it would lead to a series of loads that need to live until we
815 * see the writes to the LHS.
816 */
817 ir_variable *rhs_var = ir->rhs->variable_referenced();
818 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
819 return false;
820
821 /* Split the struct copy into individual element copies to reduce
822 * register pressure
823 */
824 ir_dereference *rhs_deref = ir->rhs->as_dereference();
825 if (!rhs_deref)
826 return false;
827
828 ir_dereference *lhs_deref = ir->lhs->as_dereference();
829 if (!lhs_deref)
830 return false;
831
832 assert(lhs_deref->type->record_compare(rhs_deref->type));
833 void *mem_ctx = ralloc_parent(shader->ir);
834
835 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
836 const char *field_name = lhs_deref->type->fields.structure[i].name;
837 ir_dereference *lhs_field =
838 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
839 field_name);
840 ir_dereference *rhs_field =
841 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
842 field_name);
843 ir->insert_after(assign(lhs_field, rhs_field));
844 }
845
846 ir->remove();
847 progress = true;
848 return true;
849 }
850
851 ir_visitor_status
852 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
853 {
854 /* Array and struct copies could involve large amounts of load/store
855 * operations. To improve register pressure we want to special-case
856 * these and split them into individual element copies.
857 * This way we avoid emitting all the loads for the RHS first and
858 * all the writes for the LHS second and register usage is more
859 * efficient.
860 */
861 if (check_for_buffer_array_copy(ir))
862 return visit_continue_with_parent;
863
864 if (check_for_buffer_struct_copy(ir))
865 return visit_continue_with_parent;
866
867 check_ssbo_unsized_array_length_assignment(ir);
868 check_for_ssbo_store(ir);
869 return rvalue_visit(ir);
870 }
871
872 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
873 * access to the buffer variable in the first parameter by an offset
874 * and block index. This involves creating the new internal intrinsic
875 * (i.e. the new function signature).
876 */
877 ir_call *
878 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
879 {
880 /* SSBO atomics usually have 2 parameters, the buffer variable and an
881 * integer argument. The exception is CompSwap, that has an additional
882 * integer parameter.
883 */
884 int param_count = ir->actual_parameters.length();
885 assert(param_count == 2 || param_count == 3);
886
887 /* First argument must be a scalar integer buffer variable */
888 exec_node *param = ir->actual_parameters.get_head();
889 ir_instruction *inst = (ir_instruction *) param;
890 assert(inst->ir_type == ir_type_dereference_variable ||
891 inst->ir_type == ir_type_dereference_array ||
892 inst->ir_type == ir_type_dereference_record ||
893 inst->ir_type == ir_type_swizzle);
894
895 ir_rvalue *deref = (ir_rvalue *) inst;
896 assert(deref->type->is_scalar() && deref->type->is_integer());
897
898 ir_variable *var = deref->variable_referenced();
899 assert(var);
900
901 /* Compute the offset to the start if the dereference and the
902 * block index
903 */
904 void *mem_ctx = ralloc_parent(shader->ir);
905
906 ir_rvalue *offset = NULL;
907 unsigned const_offset;
908 bool row_major;
909 int matrix_columns;
910 unsigned packing = var->get_interface_type()->interface_packing;
911
912 this->buffer_access_type = ssbo_atomic_access;
913
914 setup_for_load_or_store(mem_ctx, var, deref,
915 &offset, &const_offset,
916 &row_major, &matrix_columns,
917 packing);
918 assert(offset);
919 assert(!row_major);
920 assert(matrix_columns == 1);
921
922 ir_rvalue *deref_offset =
923 add(offset, new(mem_ctx) ir_constant(const_offset));
924 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
925
926 /* Create the new internal function signature that will take a block
927 * index and offset instead of a buffer variable
928 */
929 exec_list sig_params;
930 ir_variable *sig_param = new(mem_ctx)
931 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
932 sig_params.push_tail(sig_param);
933
934 sig_param = new(mem_ctx)
935 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
936 sig_params.push_tail(sig_param);
937
938 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
939 glsl_type::int_type : glsl_type::uint_type;
940 sig_param = new(mem_ctx)
941 ir_variable(type, "data1", ir_var_function_in);
942 sig_params.push_tail(sig_param);
943
944 if (param_count == 3) {
945 sig_param = new(mem_ctx)
946 ir_variable(type, "data2", ir_var_function_in);
947 sig_params.push_tail(sig_param);
948 }
949
950 ir_function_signature *sig =
951 new(mem_ctx) ir_function_signature(deref->type,
952 shader_storage_buffer_object);
953 assert(sig);
954 sig->replace_parameters(&sig_params);
955 sig->is_intrinsic = true;
956
957 char func_name[64];
958 sprintf(func_name, "%s_ssbo", ir->callee_name());
959 ir_function *f = new(mem_ctx) ir_function(func_name);
960 f->add_signature(sig);
961
962 /* Now, create the call to the internal intrinsic */
963 exec_list call_params;
964 call_params.push_tail(block_index);
965 call_params.push_tail(deref_offset);
966 param = ir->actual_parameters.get_head()->get_next();
967 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
968 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
969 if (param_count == 3) {
970 param = param->get_next();
971 param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
972 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
973 }
974 ir_dereference_variable *return_deref =
975 ir->return_deref->clone(mem_ctx, NULL);
976 return new(mem_ctx) ir_call(sig, return_deref, &call_params);
977 }
978
979 ir_call *
980 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
981 {
982 exec_list& params = ir->actual_parameters;
983
984 if (params.length() < 2 || params.length() > 3)
985 return ir;
986
987 ir_rvalue *rvalue =
988 ((ir_instruction *) params.get_head())->as_rvalue();
989 if (!rvalue)
990 return ir;
991
992 ir_variable *var = rvalue->variable_referenced();
993 if (!var || !var->is_in_shader_storage_block())
994 return ir;
995
996 const char *callee = ir->callee_name();
997 if (!strcmp("__intrinsic_atomic_add", callee) ||
998 !strcmp("__intrinsic_atomic_min", callee) ||
999 !strcmp("__intrinsic_atomic_max", callee) ||
1000 !strcmp("__intrinsic_atomic_and", callee) ||
1001 !strcmp("__intrinsic_atomic_or", callee) ||
1002 !strcmp("__intrinsic_atomic_xor", callee) ||
1003 !strcmp("__intrinsic_atomic_exchange", callee) ||
1004 !strcmp("__intrinsic_atomic_comp_swap", callee)) {
1005 return lower_ssbo_atomic_intrinsic(ir);
1006 }
1007
1008 return ir;
1009 }
1010
1011
1012 ir_visitor_status
1013 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1014 {
1015 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1016 if (new_ir != ir) {
1017 progress = true;
1018 base_ir->replace_with(new_ir);
1019 return visit_continue_with_parent;
1020 }
1021
1022 return rvalue_visit(ir);
1023 }
1024
1025
1026 } /* unnamed namespace */
1027
1028 void
1029 lower_ubo_reference(struct gl_shader *shader)
1030 {
1031 lower_ubo_reference_visitor v(shader);
1032
1033 /* Loop over the instructions lowering references, because we take
1034 * a deref of a UBO array using a UBO dereference as the index will
1035 * produce a collection of instructions all of which have cloned
1036 * UBO dereferences for that array index.
1037 */
1038 do {
1039 v.progress = false;
1040 visit_list_elements(&v, shader->ir);
1041 } while (v.progress);
1042 }