glsl: teach lower_ubo_reference about samplers inside structures
[mesa.git] / src / compiler / glsl / lower_ubo_reference.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_ubo_reference.cpp
26 *
27 * IR lower pass to replace dereferences of variables in a uniform
28 * buffer object with usage of ir_binop_ubo_load expressions, each of
29 * which can read data up to the size of a vec4.
30 *
31 * This relieves drivers of the responsibility to deal with tricky UBO
32 * layout issues like std140 structures and row_major matrices on
33 * their own.
34 */
35
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40
41 using namespace ir_builder;
42
43 namespace {
44 class lower_ubo_reference_visitor :
45 public lower_buffer_access::lower_buffer_access {
46 public:
47 lower_ubo_reference_visitor(struct gl_linked_shader *shader,
48 bool clamp_block_indices)
49 : shader(shader), clamp_block_indices(clamp_block_indices),
50 struct_field(NULL), variable(NULL)
51 {
52 }
53
54 void handle_rvalue(ir_rvalue **rvalue);
55 ir_visitor_status visit_enter(ir_assignment *ir);
56
57 void setup_for_load_or_store(void *mem_ctx,
58 ir_variable *var,
59 ir_rvalue *deref,
60 ir_rvalue **offset,
61 unsigned *const_offset,
62 bool *row_major,
63 int *matrix_columns,
64 enum glsl_interface_packing packing);
65 uint32_t ssbo_access_params();
66 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
67 ir_rvalue *offset);
68 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
69 ir_rvalue *offset);
70
71 bool check_for_buffer_array_copy(ir_assignment *ir);
72 bool check_for_buffer_struct_copy(ir_assignment *ir);
73 void check_for_ssbo_store(ir_assignment *ir);
74 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
75 ir_variable *write_var, unsigned write_mask);
76 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
77 unsigned write_mask);
78
79 enum {
80 ubo_load_access,
81 ssbo_load_access,
82 ssbo_store_access,
83 ssbo_unsized_array_length_access,
84 ssbo_atomic_access,
85 } buffer_access_type;
86
87 void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
88 const glsl_type *type, ir_rvalue *offset,
89 unsigned mask, int channel);
90
91 ir_visitor_status visit_enter(class ir_expression *);
92 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
93 void check_ssbo_unsized_array_length_expression(class ir_expression *);
94 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
95
96 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
97 ir_dereference *,
98 ir_variable *);
99 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
100
101 unsigned calculate_unsized_array_stride(ir_dereference *deref,
102 enum glsl_interface_packing packing);
103
104 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
105 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
106 ir_visitor_status visit_enter(ir_call *ir);
107 ir_visitor_status visit_enter(ir_texture *ir);
108
109 struct gl_linked_shader *shader;
110 bool clamp_block_indices;
111 const struct glsl_struct_field *struct_field;
112 ir_variable *variable;
113 ir_rvalue *uniform_block;
114 bool progress;
115 };
116
117 /**
118 * Determine the name of the interface block field
119 *
120 * This is the name of the specific member as it would appear in the
121 * \c gl_uniform_buffer_variable::Name field in the shader's
122 * \c UniformBlocks array.
123 */
124 static const char *
125 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
126 ir_rvalue **nonconst_block_index)
127 {
128 *nonconst_block_index = NULL;
129 char *name_copy = NULL;
130 size_t base_length = 0;
131
132 /* Loop back through the IR until we find the uniform block */
133 ir_rvalue *ir = d;
134 while (ir != NULL) {
135 switch (ir->ir_type) {
136 case ir_type_dereference_variable: {
137 /* Exit loop */
138 ir = NULL;
139 break;
140 }
141
142 case ir_type_dereference_record: {
143 ir_dereference_record *r = (ir_dereference_record *) ir;
144 ir = r->record->as_dereference();
145
146 /* If we got here it means any previous array subscripts belong to
147 * block members and not the block itself so skip over them in the
148 * next pass.
149 */
150 d = ir;
151 break;
152 }
153
154 case ir_type_dereference_array: {
155 ir_dereference_array *a = (ir_dereference_array *) ir;
156 ir = a->array->as_dereference();
157 break;
158 }
159
160 case ir_type_swizzle: {
161 ir_swizzle *s = (ir_swizzle *) ir;
162 ir = s->val->as_dereference();
163 /* Skip swizzle in the next pass */
164 d = ir;
165 break;
166 }
167
168 default:
169 assert(!"Should not get here.");
170 break;
171 }
172 }
173
174 while (d != NULL) {
175 switch (d->ir_type) {
176 case ir_type_dereference_variable: {
177 ir_dereference_variable *v = (ir_dereference_variable *) d;
178 if (name_copy != NULL &&
179 v->var->is_interface_instance() &&
180 v->var->type->is_array()) {
181 return name_copy;
182 } else {
183 *nonconst_block_index = NULL;
184 return base_name;
185 }
186
187 break;
188 }
189
190 case ir_type_dereference_array: {
191 ir_dereference_array *a = (ir_dereference_array *) d;
192 size_t new_length;
193
194 if (name_copy == NULL) {
195 name_copy = ralloc_strdup(mem_ctx, base_name);
196 base_length = strlen(name_copy);
197 }
198
199 /* For arrays of arrays we start at the innermost array and work our
200 * way out so we need to insert the subscript at the base of the
201 * name string rather than just attaching it to the end.
202 */
203 new_length = base_length;
204 ir_constant *const_index = a->array_index->as_constant();
205 char *end = ralloc_strdup(NULL, &name_copy[new_length]);
206 if (!const_index) {
207 ir_rvalue *array_index = a->array_index;
208 if (array_index->type != glsl_type::uint_type)
209 array_index = i2u(array_index);
210
211 if (a->array->type->is_array() &&
212 a->array->type->fields.array->is_array()) {
213 ir_constant *base_size = new(mem_ctx)
214 ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
215 array_index = mul(array_index, base_size);
216 }
217
218 if (*nonconst_block_index) {
219 *nonconst_block_index = add(*nonconst_block_index, array_index);
220 } else {
221 *nonconst_block_index = array_index;
222 }
223
224 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
225 end);
226 } else {
227 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
228 const_index->get_uint_component(0),
229 end);
230 }
231 ralloc_free(end);
232
233 d = a->array->as_dereference();
234
235 break;
236 }
237
238 default:
239 assert(!"Should not get here.");
240 break;
241 }
242 }
243
244 assert(!"Should not get here.");
245 return NULL;
246 }
247
248 static ir_rvalue *
249 clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
250 {
251 assert(type->is_array());
252
253 const unsigned array_size = type->arrays_of_arrays_size();
254
255 ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
256 max_index->type = index->type;
257
258 ir_constant *zero = new(mem_ctx) ir_constant(0);
259 zero->type = index->type;
260
261 if (index->type->base_type == GLSL_TYPE_INT)
262 index = max2(index, zero);
263 index = min2(index, max_index);
264
265 return index;
266 }
267
268 void
269 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
270 ir_variable *var,
271 ir_rvalue *deref,
272 ir_rvalue **offset,
273 unsigned *const_offset,
274 bool *row_major,
275 int *matrix_columns,
276 enum glsl_interface_packing packing)
277 {
278 /* Determine the name of the interface block */
279 ir_rvalue *nonconst_block_index;
280 const char *const field_name =
281 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
282 deref, &nonconst_block_index);
283
284 if (nonconst_block_index && clamp_block_indices) {
285 nonconst_block_index =
286 clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
287 }
288
289 /* Locate the block by interface name */
290 unsigned num_blocks;
291 struct gl_uniform_block **blocks;
292 if (this->buffer_access_type != ubo_load_access) {
293 num_blocks = shader->Program->info.num_ssbos;
294 blocks = shader->Program->sh.ShaderStorageBlocks;
295 } else {
296 num_blocks = shader->Program->info.num_ubos;
297 blocks = shader->Program->sh.UniformBlocks;
298 }
299 this->uniform_block = NULL;
300 for (unsigned i = 0; i < num_blocks; i++) {
301 if (strcmp(field_name, blocks[i]->Name) == 0) {
302
303 ir_constant *index = new(mem_ctx) ir_constant(i);
304
305 if (nonconst_block_index) {
306 this->uniform_block = add(nonconst_block_index, index);
307 } else {
308 this->uniform_block = index;
309 }
310
311 if (var->is_interface_instance()) {
312 *const_offset = 0;
313 } else {
314 *const_offset = blocks[i]->Uniforms[var->data.location].Offset;
315 }
316
317 break;
318 }
319 }
320
321 assert(this->uniform_block);
322
323 this->struct_field = NULL;
324 setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
325 matrix_columns, &this->struct_field, packing);
326 }
327
328 void
329 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
330 {
331 if (!*rvalue)
332 return;
333
334 ir_dereference *deref = (*rvalue)->as_dereference();
335 if (!deref)
336 return;
337
338 ir_variable *var = deref->variable_referenced();
339 if (!var || !var->is_in_buffer_block())
340 return;
341
342 void *mem_ctx = ralloc_parent(shader->ir);
343
344 ir_rvalue *offset = NULL;
345 unsigned const_offset;
346 bool row_major;
347 int matrix_columns;
348 enum glsl_interface_packing packing = var->get_interface_type_packing();
349
350 this->buffer_access_type =
351 var->is_in_shader_storage_block() ?
352 ssbo_load_access : ubo_load_access;
353 this->variable = var;
354
355 /* Compute the offset to the start if the dereference as well as other
356 * information we need to configure the write
357 */
358 setup_for_load_or_store(mem_ctx, var, deref,
359 &offset, &const_offset,
360 &row_major, &matrix_columns,
361 packing);
362 assert(offset);
363
364 /* Now that we've calculated the offset to the start of the
365 * dereference, walk over the type and emit loads into a temporary.
366 */
367 const glsl_type *type = (*rvalue)->type;
368 ir_variable *load_var = new(mem_ctx) ir_variable(type,
369 "ubo_load_temp",
370 ir_var_temporary);
371 base_ir->insert_before(load_var);
372
373 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
374 "ubo_load_temp_offset",
375 ir_var_temporary);
376 base_ir->insert_before(load_offset);
377 base_ir->insert_before(assign(load_offset, offset));
378
379 deref = new(mem_ctx) ir_dereference_variable(load_var);
380 emit_access(mem_ctx, false, deref, load_offset, const_offset,
381 row_major, matrix_columns, packing, 0);
382 *rvalue = deref;
383
384 progress = true;
385 }
386
387 ir_expression *
388 lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
389 const glsl_type *type,
390 ir_rvalue *offset)
391 {
392 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
393 return new(mem_ctx)
394 ir_expression(ir_binop_ubo_load,
395 type,
396 block_ref,
397 offset);
398
399 }
400
401 static bool
402 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
403 {
404 return state->has_shader_storage_buffer_objects();
405 }
406
407 uint32_t
408 lower_ubo_reference_visitor::ssbo_access_params()
409 {
410 assert(variable);
411
412 if (variable->is_interface_instance()) {
413 assert(struct_field);
414
415 return ((struct_field->memory_coherent ? ACCESS_COHERENT : 0) |
416 (struct_field->memory_restrict ? ACCESS_RESTRICT : 0) |
417 (struct_field->memory_volatile ? ACCESS_VOLATILE : 0));
418 } else {
419 return ((variable->data.memory_coherent ? ACCESS_COHERENT : 0) |
420 (variable->data.memory_restrict ? ACCESS_RESTRICT : 0) |
421 (variable->data.memory_volatile ? ACCESS_VOLATILE : 0));
422 }
423 }
424
425 ir_call *
426 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
427 ir_rvalue *deref,
428 ir_rvalue *offset,
429 unsigned write_mask)
430 {
431 exec_list sig_params;
432
433 ir_variable *block_ref = new(mem_ctx)
434 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
435 sig_params.push_tail(block_ref);
436
437 ir_variable *offset_ref = new(mem_ctx)
438 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
439 sig_params.push_tail(offset_ref);
440
441 ir_variable *val_ref = new(mem_ctx)
442 ir_variable(deref->type, "value" , ir_var_function_in);
443 sig_params.push_tail(val_ref);
444
445 ir_variable *writemask_ref = new(mem_ctx)
446 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
447 sig_params.push_tail(writemask_ref);
448
449 ir_variable *access_ref = new(mem_ctx)
450 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
451 sig_params.push_tail(access_ref);
452
453 ir_function_signature *sig = new(mem_ctx)
454 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
455 assert(sig);
456 sig->replace_parameters(&sig_params);
457 sig->intrinsic_id = ir_intrinsic_ssbo_store;
458
459 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
460 f->add_signature(sig);
461
462 exec_list call_params;
463 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
464 call_params.push_tail(offset->clone(mem_ctx, NULL));
465 call_params.push_tail(deref->clone(mem_ctx, NULL));
466 call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
467 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
468 return new(mem_ctx) ir_call(sig, NULL, &call_params);
469 }
470
471 ir_call *
472 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
473 const struct glsl_type *type,
474 ir_rvalue *offset)
475 {
476 exec_list sig_params;
477
478 ir_variable *block_ref = new(mem_ctx)
479 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
480 sig_params.push_tail(block_ref);
481
482 ir_variable *offset_ref = new(mem_ctx)
483 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
484 sig_params.push_tail(offset_ref);
485
486 ir_variable *access_ref = new(mem_ctx)
487 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
488 sig_params.push_tail(access_ref);
489
490 ir_function_signature *sig =
491 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
492 assert(sig);
493 sig->replace_parameters(&sig_params);
494 sig->intrinsic_id = ir_intrinsic_ssbo_load;
495
496 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
497 f->add_signature(sig);
498
499 ir_variable *result = new(mem_ctx)
500 ir_variable(type, "ssbo_load_result", ir_var_temporary);
501 base_ir->insert_before(result);
502 ir_dereference_variable *deref_result = new(mem_ctx)
503 ir_dereference_variable(result);
504
505 exec_list call_params;
506 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
507 call_params.push_tail(offset->clone(mem_ctx, NULL));
508 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
509
510 return new(mem_ctx) ir_call(sig, deref_result, &call_params);
511 }
512
513 void
514 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
515 ir_dereference *deref,
516 const glsl_type *type,
517 ir_rvalue *offset,
518 unsigned mask,
519 int channel)
520 {
521 switch (this->buffer_access_type) {
522 case ubo_load_access:
523 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
524 ubo_load(mem_ctx, type, offset),
525 mask));
526 break;
527 case ssbo_load_access: {
528 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
529 base_ir->insert_before(load_ssbo);
530 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
531 ir_assignment *assignment =
532 assign(deref->clone(mem_ctx, NULL), value, mask);
533 base_ir->insert_before(assignment);
534 break;
535 }
536 case ssbo_store_access:
537 if (channel >= 0) {
538 base_ir->insert_after(ssbo_store(mem_ctx,
539 swizzle(deref, channel, 1),
540 offset, 1));
541 } else {
542 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
543 }
544 break;
545 default:
546 unreachable("invalid buffer_access_type in insert_buffer_access");
547 }
548 }
549
550 void
551 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
552 ir_dereference *deref,
553 ir_variable *var,
554 ir_variable *write_var,
555 unsigned write_mask)
556 {
557 ir_rvalue *offset = NULL;
558 unsigned const_offset;
559 bool row_major;
560 int matrix_columns;
561 enum glsl_interface_packing packing = var->get_interface_type_packing();
562
563 this->buffer_access_type = ssbo_store_access;
564 this->variable = var;
565
566 /* Compute the offset to the start if the dereference as well as other
567 * information we need to configure the write
568 */
569 setup_for_load_or_store(mem_ctx, var, deref,
570 &offset, &const_offset,
571 &row_major, &matrix_columns,
572 packing);
573 assert(offset);
574
575 /* Now emit writes from the temporary to memory */
576 ir_variable *write_offset =
577 new(mem_ctx) ir_variable(glsl_type::uint_type,
578 "ssbo_store_temp_offset",
579 ir_var_temporary);
580
581 base_ir->insert_before(write_offset);
582 base_ir->insert_before(assign(write_offset, offset));
583
584 deref = new(mem_ctx) ir_dereference_variable(write_var);
585 emit_access(mem_ctx, true, deref, write_offset, const_offset,
586 row_major, matrix_columns, packing, write_mask);
587 }
588
589 ir_visitor_status
590 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
591 {
592 check_ssbo_unsized_array_length_expression(ir);
593 return rvalue_visit(ir);
594 }
595
596 ir_expression *
597 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
598 {
599 if (expr->operation !=
600 ir_expression_operation(ir_unop_ssbo_unsized_array_length))
601 return NULL;
602
603 ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
604 if (!rvalue ||
605 !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
606 return NULL;
607
608 ir_dereference *deref = expr->operands[0]->as_dereference();
609 if (!deref)
610 return NULL;
611
612 ir_variable *var = expr->operands[0]->variable_referenced();
613 if (!var || !var->is_in_shader_storage_block())
614 return NULL;
615 return process_ssbo_unsized_array_length(&rvalue, deref, var);
616 }
617
618 void
619 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
620 {
621 if (ir->operation ==
622 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
623 /* Don't replace this unop if it is found alone. It is going to be
624 * removed by the optimization passes or replaced if it is part of
625 * an ir_assignment or another ir_expression.
626 */
627 return;
628 }
629
630 for (unsigned i = 0; i < ir->get_num_operands(); i++) {
631 if (ir->operands[i]->ir_type != ir_type_expression)
632 continue;
633 ir_expression *expr = (ir_expression *) ir->operands[i];
634 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
635 if (!temp)
636 continue;
637
638 delete expr;
639 ir->operands[i] = temp;
640 }
641 }
642
643 void
644 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
645 {
646 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
647 return;
648
649 ir_expression *expr = (ir_expression *) ir->rhs;
650 ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
651 if (!temp)
652 return;
653
654 delete expr;
655 ir->rhs = temp;
656 return;
657 }
658
659 ir_expression *
660 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
661 {
662 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
663 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
664 glsl_type::int_type,
665 block_ref);
666 }
667
668 unsigned
669 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
670 enum glsl_interface_packing packing)
671 {
672 unsigned array_stride = 0;
673
674 switch (deref->ir_type) {
675 case ir_type_dereference_variable:
676 {
677 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
678 const struct glsl_type *unsized_array_type = NULL;
679 /* An unsized array can be sized by other lowering passes, so pick
680 * the first field of the array which has the data type of the unsized
681 * array.
682 */
683 unsized_array_type = deref_var->var->type->fields.array;
684
685 /* Whether or not the field is row-major (because it might be a
686 * bvec2 or something) does not affect the array itself. We need
687 * to know whether an array element in its entirety is row-major.
688 */
689 const bool array_row_major =
690 is_dereferenced_thing_row_major(deref_var);
691
692 if (packing == GLSL_INTERFACE_PACKING_STD430) {
693 array_stride = unsized_array_type->std430_array_stride(array_row_major);
694 } else {
695 array_stride = unsized_array_type->std140_size(array_row_major);
696 array_stride = glsl_align(array_stride, 16);
697 }
698 break;
699 }
700 case ir_type_dereference_record:
701 {
702 ir_dereference_record *deref_record = (ir_dereference_record *) deref;
703 ir_dereference *interface_deref =
704 deref_record->record->as_dereference();
705 assert(interface_deref != NULL);
706 const struct glsl_type *interface_type = interface_deref->type;
707 unsigned record_length = interface_type->length;
708 /* Unsized array is always the last element of the interface */
709 const struct glsl_type *unsized_array_type =
710 interface_type->fields.structure[record_length - 1].type->fields.array;
711
712 const bool array_row_major =
713 is_dereferenced_thing_row_major(deref_record);
714
715 if (packing == GLSL_INTERFACE_PACKING_STD430) {
716 array_stride = unsized_array_type->std430_array_stride(array_row_major);
717 } else {
718 array_stride = unsized_array_type->std140_size(array_row_major);
719 array_stride = glsl_align(array_stride, 16);
720 }
721 break;
722 }
723 default:
724 unreachable("Unsupported dereference type");
725 }
726 return array_stride;
727 }
728
729 ir_expression *
730 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
731 ir_dereference *deref,
732 ir_variable *var)
733 {
734 void *mem_ctx = ralloc_parent(*rvalue);
735
736 ir_rvalue *base_offset = NULL;
737 unsigned const_offset;
738 bool row_major;
739 int matrix_columns;
740 enum glsl_interface_packing packing = var->get_interface_type_packing();
741 int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
742
743 this->buffer_access_type = ssbo_unsized_array_length_access;
744 this->variable = var;
745
746 /* Compute the offset to the start if the dereference as well as other
747 * information we need to calculate the length.
748 */
749 setup_for_load_or_store(mem_ctx, var, deref,
750 &base_offset, &const_offset,
751 &row_major, &matrix_columns,
752 packing);
753 /* array.length() =
754 * max((buffer_object_size - offset_of_array) / stride_of_array, 0)
755 */
756 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
757
758 ir_expression *offset_of_array = new(mem_ctx)
759 ir_expression(ir_binop_add, base_offset,
760 new(mem_ctx) ir_constant(const_offset));
761 ir_expression *offset_of_array_int = new(mem_ctx)
762 ir_expression(ir_unop_u2i, offset_of_array);
763
764 ir_expression *sub = new(mem_ctx)
765 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
766 ir_expression *div = new(mem_ctx)
767 ir_expression(ir_binop_div, sub,
768 new(mem_ctx) ir_constant(unsized_array_stride));
769 ir_expression *max = new(mem_ctx)
770 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
771
772 return max;
773 }
774
775 void
776 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
777 {
778 if (!ir || !ir->lhs)
779 return;
780
781 ir_rvalue *rvalue = ir->lhs->as_rvalue();
782 if (!rvalue)
783 return;
784
785 ir_dereference *deref = ir->lhs->as_dereference();
786 if (!deref)
787 return;
788
789 ir_variable *var = ir->lhs->variable_referenced();
790 if (!var || !var->is_in_shader_storage_block())
791 return;
792
793 /* We have a write to a buffer variable, so declare a temporary and rewrite
794 * the assignment so that the temporary is the LHS.
795 */
796 void *mem_ctx = ralloc_parent(shader->ir);
797
798 const glsl_type *type = rvalue->type;
799 ir_variable *write_var = new(mem_ctx) ir_variable(type,
800 "ssbo_store_temp",
801 ir_var_temporary);
802 base_ir->insert_before(write_var);
803 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
804
805 /* Now we have to write the value assigned to the temporary back to memory */
806 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
807 progress = true;
808 }
809
810 static bool
811 is_buffer_backed_variable(ir_variable *var)
812 {
813 return var->is_in_buffer_block() ||
814 var->data.mode == ir_var_shader_shared;
815 }
816
817 bool
818 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
819 {
820 if (!ir || !ir->lhs || !ir->rhs)
821 return false;
822
823 /* LHS and RHS must be arrays
824 * FIXME: arrays of arrays?
825 */
826 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
827 return false;
828
829 /* RHS must be a buffer-backed variable. This is what can cause the problem
830 * since it would lead to a series of loads that need to live until we
831 * see the writes to the LHS.
832 */
833 ir_variable *rhs_var = ir->rhs->variable_referenced();
834 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
835 return false;
836
837 /* Split the array copy into individual element copies to reduce
838 * register pressure
839 */
840 ir_dereference *rhs_deref = ir->rhs->as_dereference();
841 if (!rhs_deref)
842 return false;
843
844 ir_dereference *lhs_deref = ir->lhs->as_dereference();
845 if (!lhs_deref)
846 return false;
847
848 assert(lhs_deref->type->length == rhs_deref->type->length);
849 void *mem_ctx = ralloc_parent(shader->ir);
850
851 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
852 ir_dereference *lhs_i =
853 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
854 new(mem_ctx) ir_constant(i));
855
856 ir_dereference *rhs_i =
857 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
858 new(mem_ctx) ir_constant(i));
859 ir->insert_after(assign(lhs_i, rhs_i));
860 }
861
862 ir->remove();
863 progress = true;
864 return true;
865 }
866
867 bool
868 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
869 {
870 if (!ir || !ir->lhs || !ir->rhs)
871 return false;
872
873 /* LHS and RHS must be records */
874 if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
875 return false;
876
877 /* RHS must be a buffer-backed variable. This is what can cause the problem
878 * since it would lead to a series of loads that need to live until we
879 * see the writes to the LHS.
880 */
881 ir_variable *rhs_var = ir->rhs->variable_referenced();
882 if (!rhs_var || !is_buffer_backed_variable(rhs_var))
883 return false;
884
885 /* Split the struct copy into individual element copies to reduce
886 * register pressure
887 */
888 ir_dereference *rhs_deref = ir->rhs->as_dereference();
889 if (!rhs_deref)
890 return false;
891
892 ir_dereference *lhs_deref = ir->lhs->as_dereference();
893 if (!lhs_deref)
894 return false;
895
896 assert(lhs_deref->type->record_compare(rhs_deref->type));
897 void *mem_ctx = ralloc_parent(shader->ir);
898
899 for (unsigned i = 0; i < lhs_deref->type->length; i++) {
900 const char *field_name = lhs_deref->type->fields.structure[i].name;
901 ir_dereference *lhs_field =
902 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
903 field_name);
904 ir_dereference *rhs_field =
905 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
906 field_name);
907 ir->insert_after(assign(lhs_field, rhs_field));
908 }
909
910 ir->remove();
911 progress = true;
912 return true;
913 }
914
915 ir_visitor_status
916 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
917 {
918 /* Array and struct copies could involve large amounts of load/store
919 * operations. To improve register pressure we want to special-case
920 * these and split them into individual element copies.
921 * This way we avoid emitting all the loads for the RHS first and
922 * all the writes for the LHS second and register usage is more
923 * efficient.
924 */
925 if (check_for_buffer_array_copy(ir))
926 return visit_continue_with_parent;
927
928 if (check_for_buffer_struct_copy(ir))
929 return visit_continue_with_parent;
930
931 check_ssbo_unsized_array_length_assignment(ir);
932 check_for_ssbo_store(ir);
933 return rvalue_visit(ir);
934 }
935
936 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
937 * access to the buffer variable in the first parameter by an offset
938 * and block index. This involves creating the new internal intrinsic
939 * (i.e. the new function signature).
940 */
941 ir_call *
942 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
943 {
944 /* SSBO atomics usually have 2 parameters, the buffer variable and an
945 * integer argument. The exception is CompSwap, that has an additional
946 * integer parameter.
947 */
948 int param_count = ir->actual_parameters.length();
949 assert(param_count == 2 || param_count == 3);
950
951 /* First argument must be a scalar integer buffer variable */
952 exec_node *param = ir->actual_parameters.get_head();
953 ir_instruction *inst = (ir_instruction *) param;
954 assert(inst->ir_type == ir_type_dereference_variable ||
955 inst->ir_type == ir_type_dereference_array ||
956 inst->ir_type == ir_type_dereference_record ||
957 inst->ir_type == ir_type_swizzle);
958
959 ir_rvalue *deref = (ir_rvalue *) inst;
960 assert(deref->type->is_scalar() && deref->type->is_integer());
961
962 ir_variable *var = deref->variable_referenced();
963 assert(var);
964
965 /* Compute the offset to the start if the dereference and the
966 * block index
967 */
968 void *mem_ctx = ralloc_parent(shader->ir);
969
970 ir_rvalue *offset = NULL;
971 unsigned const_offset;
972 bool row_major;
973 int matrix_columns;
974 enum glsl_interface_packing packing = var->get_interface_type_packing();
975
976 this->buffer_access_type = ssbo_atomic_access;
977 this->variable = var;
978
979 setup_for_load_or_store(mem_ctx, var, deref,
980 &offset, &const_offset,
981 &row_major, &matrix_columns,
982 packing);
983 assert(offset);
984 assert(!row_major);
985 assert(matrix_columns == 1);
986
987 ir_rvalue *deref_offset =
988 add(offset, new(mem_ctx) ir_constant(const_offset));
989 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
990
991 /* Create the new internal function signature that will take a block
992 * index and offset instead of a buffer variable
993 */
994 exec_list sig_params;
995 ir_variable *sig_param = new(mem_ctx)
996 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
997 sig_params.push_tail(sig_param);
998
999 sig_param = new(mem_ctx)
1000 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
1001 sig_params.push_tail(sig_param);
1002
1003 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
1004 glsl_type::int_type : glsl_type::uint_type;
1005 sig_param = new(mem_ctx)
1006 ir_variable(type, "data1", ir_var_function_in);
1007 sig_params.push_tail(sig_param);
1008
1009 if (param_count == 3) {
1010 sig_param = new(mem_ctx)
1011 ir_variable(type, "data2", ir_var_function_in);
1012 sig_params.push_tail(sig_param);
1013 }
1014
1015 ir_function_signature *sig =
1016 new(mem_ctx) ir_function_signature(deref->type,
1017 shader_storage_buffer_object);
1018 assert(sig);
1019 sig->replace_parameters(&sig_params);
1020
1021 assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
1022 assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
1023 sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo);
1024
1025 char func_name[64];
1026 sprintf(func_name, "%s_ssbo", ir->callee_name());
1027 ir_function *f = new(mem_ctx) ir_function(func_name);
1028 f->add_signature(sig);
1029
1030 /* Now, create the call to the internal intrinsic */
1031 exec_list call_params;
1032 call_params.push_tail(block_index);
1033 call_params.push_tail(deref_offset);
1034 param = ir->actual_parameters.get_head()->get_next();
1035 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1036 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1037 if (param_count == 3) {
1038 param = param->get_next();
1039 param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1040 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1041 }
1042 ir_dereference_variable *return_deref =
1043 ir->return_deref->clone(mem_ctx, NULL);
1044 return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1045 }
1046
1047 ir_call *
1048 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1049 {
1050 exec_list& params = ir->actual_parameters;
1051
1052 if (params.length() < 2 || params.length() > 3)
1053 return ir;
1054
1055 ir_rvalue *rvalue =
1056 ((ir_instruction *) params.get_head())->as_rvalue();
1057 if (!rvalue)
1058 return ir;
1059
1060 ir_variable *var = rvalue->variable_referenced();
1061 if (!var || !var->is_in_shader_storage_block())
1062 return ir;
1063
1064 const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
1065 if (id == ir_intrinsic_generic_atomic_add ||
1066 id == ir_intrinsic_generic_atomic_min ||
1067 id == ir_intrinsic_generic_atomic_max ||
1068 id == ir_intrinsic_generic_atomic_and ||
1069 id == ir_intrinsic_generic_atomic_or ||
1070 id == ir_intrinsic_generic_atomic_xor ||
1071 id == ir_intrinsic_generic_atomic_exchange ||
1072 id == ir_intrinsic_generic_atomic_comp_swap) {
1073 return lower_ssbo_atomic_intrinsic(ir);
1074 }
1075
1076 return ir;
1077 }
1078
1079
1080 ir_visitor_status
1081 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1082 {
1083 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1084 if (new_ir != ir) {
1085 progress = true;
1086 base_ir->replace_with(new_ir);
1087 return visit_continue_with_parent;
1088 }
1089
1090 return rvalue_visit(ir);
1091 }
1092
1093
1094 ir_visitor_status
1095 lower_ubo_reference_visitor::visit_enter(ir_texture *ir)
1096 {
1097 ir_dereference *sampler = ir->sampler;
1098
1099 if (sampler->ir_type == ir_type_dereference_record) {
1100 handle_rvalue((ir_rvalue **)&ir->sampler);
1101 return visit_continue_with_parent;
1102 }
1103
1104 return rvalue_visit(ir);
1105 }
1106
1107
1108 } /* unnamed namespace */
1109
1110 void
1111 lower_ubo_reference(struct gl_linked_shader *shader, bool clamp_block_indices)
1112 {
1113 lower_ubo_reference_visitor v(shader, clamp_block_indices);
1114
1115 /* Loop over the instructions lowering references, because we take
1116 * a deref of a UBO array using a UBO dereference as the index will
1117 * produce a collection of instructions all of which have cloned
1118 * UBO dereferences for that array index.
1119 */
1120 do {
1121 v.progress = false;
1122 visit_list_elements(&v, shader->ir);
1123 } while (v.progress);
1124 }