nir: Add access flags to deref and SSBO atomics
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37
38 struct lower_io_state {
39 void *dead_ctx;
40 nir_builder builder;
41 int (*type_size)(const struct glsl_type *type);
42 nir_variable_mode modes;
43 nir_lower_io_options options;
44 };
45
46 static nir_intrinsic_op
47 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
48 {
49 switch (deref_op) {
50 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
51 OP(atomic_exchange)
52 OP(atomic_comp_swap)
53 OP(atomic_add)
54 OP(atomic_imin)
55 OP(atomic_umin)
56 OP(atomic_imax)
57 OP(atomic_umax)
58 OP(atomic_and)
59 OP(atomic_or)
60 OP(atomic_xor)
61 OP(atomic_fadd)
62 OP(atomic_fmin)
63 OP(atomic_fmax)
64 OP(atomic_fcomp_swap)
65 #undef OP
66 default:
67 unreachable("Invalid SSBO atomic");
68 }
69 }
70
71 static nir_intrinsic_op
72 global_atomic_for_deref(nir_intrinsic_op deref_op)
73 {
74 switch (deref_op) {
75 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
76 OP(atomic_exchange)
77 OP(atomic_comp_swap)
78 OP(atomic_add)
79 OP(atomic_imin)
80 OP(atomic_umin)
81 OP(atomic_imax)
82 OP(atomic_umax)
83 OP(atomic_and)
84 OP(atomic_or)
85 OP(atomic_xor)
86 OP(atomic_fadd)
87 OP(atomic_fmin)
88 OP(atomic_fmax)
89 OP(atomic_fcomp_swap)
90 #undef OP
91 default:
92 unreachable("Invalid SSBO atomic");
93 }
94 }
95
96 void
97 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
98 int (*type_size)(const struct glsl_type *))
99 {
100 unsigned location = 0;
101
102 nir_foreach_variable(var, var_list) {
103 /*
104 * UBOs have their own address spaces, so don't count them towards the
105 * number of global uniforms
106 */
107 if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo)
108 continue;
109
110 var->data.driver_location = location;
111 location += type_size(var->type);
112 }
113
114 *size = location;
115 }
116
117 /**
118 * Return true if the given variable is a per-vertex input/output array.
119 * (such as geometry shader inputs).
120 */
121 bool
122 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
123 {
124 if (var->data.patch || !glsl_type_is_array(var->type))
125 return false;
126
127 if (var->data.mode == nir_var_shader_in)
128 return stage == MESA_SHADER_GEOMETRY ||
129 stage == MESA_SHADER_TESS_CTRL ||
130 stage == MESA_SHADER_TESS_EVAL;
131
132 if (var->data.mode == nir_var_shader_out)
133 return stage == MESA_SHADER_TESS_CTRL;
134
135 return false;
136 }
137
138 static nir_ssa_def *
139 get_io_offset(nir_builder *b, nir_deref_instr *deref,
140 nir_ssa_def **vertex_index,
141 int (*type_size)(const struct glsl_type *),
142 unsigned *component)
143 {
144 nir_deref_path path;
145 nir_deref_path_init(&path, deref, NULL);
146
147 assert(path.path[0]->deref_type == nir_deref_type_var);
148 nir_deref_instr **p = &path.path[1];
149
150 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
151 * outermost array index separate. Process the rest normally.
152 */
153 if (vertex_index != NULL) {
154 assert((*p)->deref_type == nir_deref_type_array);
155 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
156 p++;
157 }
158
159 if (path.path[0]->var->data.compact) {
160 assert((*p)->deref_type == nir_deref_type_array);
161 assert(glsl_type_is_scalar((*p)->type));
162
163 /* We always lower indirect dereferences for "compact" array vars. */
164 const unsigned index = nir_src_as_uint((*p)->arr.index);
165 const unsigned total_offset = *component + index;
166 const unsigned slot_offset = total_offset / 4;
167 *component = total_offset % 4;
168 return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
169 }
170
171 /* Just emit code and let constant-folding go to town */
172 nir_ssa_def *offset = nir_imm_int(b, 0);
173
174 for (; *p; p++) {
175 if ((*p)->deref_type == nir_deref_type_array) {
176 unsigned size = type_size((*p)->type);
177
178 nir_ssa_def *mul =
179 nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
180
181 offset = nir_iadd(b, offset, mul);
182 } else if ((*p)->deref_type == nir_deref_type_struct) {
183 /* p starts at path[1], so this is safe */
184 nir_deref_instr *parent = *(p - 1);
185
186 unsigned field_offset = 0;
187 for (unsigned i = 0; i < (*p)->strct.index; i++) {
188 field_offset += type_size(glsl_get_struct_field(parent->type, i));
189 }
190 offset = nir_iadd_imm(b, offset, field_offset);
191 } else {
192 unreachable("Unsupported deref type");
193 }
194 }
195
196 nir_deref_path_finish(&path);
197
198 return offset;
199 }
200
201 static nir_intrinsic_instr *
202 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
203 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
204 unsigned component)
205 {
206 const nir_shader *nir = state->builder.shader;
207 nir_variable_mode mode = var->data.mode;
208 nir_ssa_def *barycentric = NULL;
209
210 nir_intrinsic_op op;
211 switch (mode) {
212 case nir_var_shader_in:
213 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
214 nir->options->use_interpolated_input_intrinsics &&
215 var->data.interpolation != INTERP_MODE_FLAT) {
216 assert(vertex_index == NULL);
217
218 nir_intrinsic_op bary_op;
219 if (var->data.sample ||
220 (state->options & nir_lower_io_force_sample_interpolation))
221 bary_op = nir_intrinsic_load_barycentric_sample;
222 else if (var->data.centroid)
223 bary_op = nir_intrinsic_load_barycentric_centroid;
224 else
225 bary_op = nir_intrinsic_load_barycentric_pixel;
226
227 barycentric = nir_load_barycentric(&state->builder, bary_op,
228 var->data.interpolation);
229 op = nir_intrinsic_load_interpolated_input;
230 } else {
231 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
232 nir_intrinsic_load_input;
233 }
234 break;
235 case nir_var_shader_out:
236 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
237 nir_intrinsic_load_output;
238 break;
239 case nir_var_uniform:
240 op = nir_intrinsic_load_uniform;
241 break;
242 case nir_var_mem_shared:
243 op = nir_intrinsic_load_shared;
244 break;
245 default:
246 unreachable("Unknown variable mode");
247 }
248
249 nir_intrinsic_instr *load =
250 nir_intrinsic_instr_create(state->builder.shader, op);
251 load->num_components = intrin->num_components;
252
253 nir_intrinsic_set_base(load, var->data.driver_location);
254 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
255 nir_intrinsic_set_component(load, component);
256
257 if (load->intrinsic == nir_intrinsic_load_uniform)
258 nir_intrinsic_set_range(load, state->type_size(var->type));
259
260 if (vertex_index) {
261 load->src[0] = nir_src_for_ssa(vertex_index);
262 load->src[1] = nir_src_for_ssa(offset);
263 } else if (barycentric) {
264 load->src[0] = nir_src_for_ssa(barycentric);
265 load->src[1] = nir_src_for_ssa(offset);
266 } else {
267 load->src[0] = nir_src_for_ssa(offset);
268 }
269
270 return load;
271 }
272
273 static nir_intrinsic_instr *
274 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
275 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
276 unsigned component)
277 {
278 nir_variable_mode mode = var->data.mode;
279
280 nir_intrinsic_op op;
281 if (mode == nir_var_mem_shared) {
282 op = nir_intrinsic_store_shared;
283 } else {
284 assert(mode == nir_var_shader_out);
285 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
286 nir_intrinsic_store_output;
287 }
288
289 nir_intrinsic_instr *store =
290 nir_intrinsic_instr_create(state->builder.shader, op);
291 store->num_components = intrin->num_components;
292
293 nir_src_copy(&store->src[0], &intrin->src[1], store);
294
295 nir_intrinsic_set_base(store, var->data.driver_location);
296
297 if (mode == nir_var_shader_out)
298 nir_intrinsic_set_component(store, component);
299
300 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
301
302 if (vertex_index)
303 store->src[1] = nir_src_for_ssa(vertex_index);
304
305 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
306
307 return store;
308 }
309
310 static nir_intrinsic_instr *
311 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
312 nir_variable *var, nir_ssa_def *offset)
313 {
314 assert(var->data.mode == nir_var_mem_shared);
315
316 nir_intrinsic_op op;
317 switch (intrin->intrinsic) {
318 #define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break;
319 OP(atomic_exchange)
320 OP(atomic_comp_swap)
321 OP(atomic_add)
322 OP(atomic_imin)
323 OP(atomic_umin)
324 OP(atomic_imax)
325 OP(atomic_umax)
326 OP(atomic_and)
327 OP(atomic_or)
328 OP(atomic_xor)
329 OP(atomic_fadd)
330 OP(atomic_fmin)
331 OP(atomic_fmax)
332 OP(atomic_fcomp_swap)
333 #undef OP
334 default:
335 unreachable("Invalid atomic");
336 }
337
338 nir_intrinsic_instr *atomic =
339 nir_intrinsic_instr_create(state->builder.shader, op);
340
341 nir_intrinsic_set_base(atomic, var->data.driver_location);
342
343 atomic->src[0] = nir_src_for_ssa(offset);
344 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs ==
345 nir_intrinsic_infos[op].num_srcs);
346 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) {
347 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic);
348 }
349
350 return atomic;
351 }
352
353 static nir_intrinsic_instr *
354 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
355 nir_variable *var, nir_ssa_def *offset, unsigned component)
356 {
357 assert(var->data.mode == nir_var_shader_in);
358
359 /* Ignore interpolateAt() for flat variables - flat is flat. */
360 if (var->data.interpolation == INTERP_MODE_FLAT)
361 return lower_load(intrin, state, NULL, var, offset, component);
362
363 nir_intrinsic_op bary_op;
364 switch (intrin->intrinsic) {
365 case nir_intrinsic_interp_deref_at_centroid:
366 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
367 nir_intrinsic_load_barycentric_sample :
368 nir_intrinsic_load_barycentric_centroid;
369 break;
370 case nir_intrinsic_interp_deref_at_sample:
371 bary_op = nir_intrinsic_load_barycentric_at_sample;
372 break;
373 case nir_intrinsic_interp_deref_at_offset:
374 bary_op = nir_intrinsic_load_barycentric_at_offset;
375 break;
376 default:
377 unreachable("Bogus interpolateAt() intrinsic.");
378 }
379
380 nir_intrinsic_instr *bary_setup =
381 nir_intrinsic_instr_create(state->builder.shader, bary_op);
382
383 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
384 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
385
386 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
387 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset)
388 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
389
390 nir_builder_instr_insert(&state->builder, &bary_setup->instr);
391
392 nir_intrinsic_instr *load =
393 nir_intrinsic_instr_create(state->builder.shader,
394 nir_intrinsic_load_interpolated_input);
395 load->num_components = intrin->num_components;
396
397 nir_intrinsic_set_base(load, var->data.driver_location);
398 nir_intrinsic_set_component(load, component);
399
400 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
401 load->src[1] = nir_src_for_ssa(offset);
402
403 return load;
404 }
405
406 static bool
407 nir_lower_io_block(nir_block *block,
408 struct lower_io_state *state)
409 {
410 nir_builder *b = &state->builder;
411 const nir_shader_compiler_options *options = b->shader->options;
412 bool progress = false;
413
414 nir_foreach_instr_safe(instr, block) {
415 if (instr->type != nir_instr_type_intrinsic)
416 continue;
417
418 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
419
420 switch (intrin->intrinsic) {
421 case nir_intrinsic_load_deref:
422 case nir_intrinsic_store_deref:
423 case nir_intrinsic_deref_atomic_add:
424 case nir_intrinsic_deref_atomic_imin:
425 case nir_intrinsic_deref_atomic_umin:
426 case nir_intrinsic_deref_atomic_imax:
427 case nir_intrinsic_deref_atomic_umax:
428 case nir_intrinsic_deref_atomic_and:
429 case nir_intrinsic_deref_atomic_or:
430 case nir_intrinsic_deref_atomic_xor:
431 case nir_intrinsic_deref_atomic_exchange:
432 case nir_intrinsic_deref_atomic_comp_swap:
433 case nir_intrinsic_deref_atomic_fadd:
434 case nir_intrinsic_deref_atomic_fmin:
435 case nir_intrinsic_deref_atomic_fmax:
436 case nir_intrinsic_deref_atomic_fcomp_swap:
437 /* We can lower the io for this nir instrinsic */
438 break;
439 case nir_intrinsic_interp_deref_at_centroid:
440 case nir_intrinsic_interp_deref_at_sample:
441 case nir_intrinsic_interp_deref_at_offset:
442 /* We can optionally lower these to load_interpolated_input */
443 if (options->use_interpolated_input_intrinsics)
444 break;
445 default:
446 /* We can't lower the io for this nir instrinsic, so skip it */
447 continue;
448 }
449
450 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
451
452 nir_variable *var = nir_deref_instr_get_variable(deref);
453 nir_variable_mode mode = var->data.mode;
454
455 if ((state->modes & mode) == 0)
456 continue;
457
458 if (mode != nir_var_shader_in &&
459 mode != nir_var_shader_out &&
460 mode != nir_var_mem_shared &&
461 mode != nir_var_uniform)
462 continue;
463
464 b->cursor = nir_before_instr(instr);
465
466 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
467
468 nir_ssa_def *offset;
469 nir_ssa_def *vertex_index = NULL;
470 unsigned component_offset = var->data.location_frac;
471
472 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
473 state->type_size, &component_offset);
474
475 nir_intrinsic_instr *replacement;
476
477 switch (intrin->intrinsic) {
478 case nir_intrinsic_load_deref:
479 replacement = lower_load(intrin, state, vertex_index, var, offset,
480 component_offset);
481 break;
482
483 case nir_intrinsic_store_deref:
484 replacement = lower_store(intrin, state, vertex_index, var, offset,
485 component_offset);
486 break;
487
488 case nir_intrinsic_deref_atomic_add:
489 case nir_intrinsic_deref_atomic_imin:
490 case nir_intrinsic_deref_atomic_umin:
491 case nir_intrinsic_deref_atomic_imax:
492 case nir_intrinsic_deref_atomic_umax:
493 case nir_intrinsic_deref_atomic_and:
494 case nir_intrinsic_deref_atomic_or:
495 case nir_intrinsic_deref_atomic_xor:
496 case nir_intrinsic_deref_atomic_exchange:
497 case nir_intrinsic_deref_atomic_comp_swap:
498 case nir_intrinsic_deref_atomic_fadd:
499 case nir_intrinsic_deref_atomic_fmin:
500 case nir_intrinsic_deref_atomic_fmax:
501 case nir_intrinsic_deref_atomic_fcomp_swap:
502 assert(vertex_index == NULL);
503 replacement = lower_atomic(intrin, state, var, offset);
504 break;
505
506 case nir_intrinsic_interp_deref_at_centroid:
507 case nir_intrinsic_interp_deref_at_sample:
508 case nir_intrinsic_interp_deref_at_offset:
509 assert(vertex_index == NULL);
510 replacement = lower_interpolate_at(intrin, state, var, offset,
511 component_offset);
512 break;
513
514 default:
515 continue;
516 }
517
518 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
519 if (intrin->dest.is_ssa) {
520 nir_ssa_dest_init(&replacement->instr, &replacement->dest,
521 intrin->dest.ssa.num_components,
522 intrin->dest.ssa.bit_size, NULL);
523 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
524 nir_src_for_ssa(&replacement->dest.ssa));
525 } else {
526 nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr);
527 }
528 }
529
530 nir_instr_insert_before(&intrin->instr, &replacement->instr);
531 nir_instr_remove(&intrin->instr);
532 progress = true;
533 }
534
535 return progress;
536 }
537
538 static bool
539 nir_lower_io_impl(nir_function_impl *impl,
540 nir_variable_mode modes,
541 int (*type_size)(const struct glsl_type *),
542 nir_lower_io_options options)
543 {
544 struct lower_io_state state;
545 bool progress = false;
546
547 nir_builder_init(&state.builder, impl);
548 state.dead_ctx = ralloc_context(NULL);
549 state.modes = modes;
550 state.type_size = type_size;
551 state.options = options;
552
553 nir_foreach_block(block, impl) {
554 progress |= nir_lower_io_block(block, &state);
555 }
556
557 ralloc_free(state.dead_ctx);
558
559 nir_metadata_preserve(impl, nir_metadata_block_index |
560 nir_metadata_dominance);
561 return progress;
562 }
563
564 bool
565 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
566 int (*type_size)(const struct glsl_type *),
567 nir_lower_io_options options)
568 {
569 bool progress = false;
570
571 nir_foreach_function(function, shader) {
572 if (function->impl) {
573 progress |= nir_lower_io_impl(function->impl, modes,
574 type_size, options);
575 }
576 }
577
578 return progress;
579 }
580
581 static unsigned
582 type_scalar_size_bytes(const struct glsl_type *type)
583 {
584 assert(glsl_type_is_vector_or_scalar(type) ||
585 glsl_type_is_matrix(type));
586 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
587 }
588
589 static nir_ssa_def *
590 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
591 nir_address_format addr_format, nir_ssa_def *offset)
592 {
593 assert(offset->num_components == 1);
594 assert(addr->bit_size == offset->bit_size);
595
596 switch (addr_format) {
597 case nir_address_format_32bit_global:
598 case nir_address_format_64bit_global:
599 assert(addr->num_components == 1);
600 return nir_iadd(b, addr, offset);
601
602 case nir_address_format_64bit_bounded_global:
603 assert(addr->num_components == 4);
604 return nir_vec4(b, nir_channel(b, addr, 0),
605 nir_channel(b, addr, 1),
606 nir_channel(b, addr, 2),
607 nir_iadd(b, nir_channel(b, addr, 3), offset));
608
609 case nir_address_format_32bit_index_offset:
610 assert(addr->num_components == 2);
611 return nir_vec2(b, nir_channel(b, addr, 0),
612 nir_iadd(b, nir_channel(b, addr, 1), offset));
613 }
614 unreachable("Invalid address format");
615 }
616
617 static nir_ssa_def *
618 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
619 nir_address_format addr_format, int64_t offset)
620 {
621 return build_addr_iadd(b, addr, addr_format,
622 nir_imm_intN_t(b, offset, addr->bit_size));
623 }
624
625 static nir_ssa_def *
626 addr_to_index(nir_builder *b, nir_ssa_def *addr,
627 nir_address_format addr_format)
628 {
629 assert(addr_format == nir_address_format_32bit_index_offset);
630 assert(addr->num_components == 2);
631 return nir_channel(b, addr, 0);
632 }
633
634 static nir_ssa_def *
635 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
636 nir_address_format addr_format)
637 {
638 assert(addr_format == nir_address_format_32bit_index_offset);
639 assert(addr->num_components == 2);
640 return nir_channel(b, addr, 1);
641 }
642
643 /** Returns true if the given address format resolves to a global address */
644 static bool
645 addr_format_is_global(nir_address_format addr_format)
646 {
647 return addr_format == nir_address_format_32bit_global ||
648 addr_format == nir_address_format_64bit_global ||
649 addr_format == nir_address_format_64bit_bounded_global;
650 }
651
652 static nir_ssa_def *
653 addr_to_global(nir_builder *b, nir_ssa_def *addr,
654 nir_address_format addr_format)
655 {
656 switch (addr_format) {
657 case nir_address_format_32bit_global:
658 case nir_address_format_64bit_global:
659 assert(addr->num_components == 1);
660 return addr;
661
662 case nir_address_format_64bit_bounded_global:
663 assert(addr->num_components == 4);
664 return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
665 nir_u2u64(b, nir_channel(b, addr, 3)));
666
667 case nir_address_format_32bit_index_offset:
668 unreachable("Cannot get a 64-bit address with this address format");
669 }
670
671 unreachable("Invalid address format");
672 }
673
674 static bool
675 addr_format_needs_bounds_check(nir_address_format addr_format)
676 {
677 return addr_format == nir_address_format_64bit_bounded_global;
678 }
679
680 static nir_ssa_def *
681 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
682 nir_address_format addr_format, unsigned size)
683 {
684 assert(addr_format == nir_address_format_64bit_bounded_global);
685 assert(addr->num_components == 4);
686 return nir_ige(b, nir_channel(b, addr, 2),
687 nir_iadd_imm(b, nir_channel(b, addr, 3), size));
688 }
689
690 static nir_ssa_def *
691 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
692 nir_ssa_def *addr, nir_address_format addr_format,
693 unsigned num_components)
694 {
695 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
696
697 nir_intrinsic_op op;
698 switch (mode) {
699 case nir_var_mem_ubo:
700 op = nir_intrinsic_load_ubo;
701 break;
702 case nir_var_mem_ssbo:
703 if (addr_format_is_global(addr_format))
704 op = nir_intrinsic_load_global;
705 else
706 op = nir_intrinsic_load_ssbo;
707 break;
708 case nir_var_mem_global:
709 assert(addr_format_is_global(addr_format));
710 op = nir_intrinsic_load_global;
711 break;
712 case nir_var_shader_in:
713 assert(addr_format_is_global(addr_format));
714 op = nir_intrinsic_load_kernel_input;
715 break;
716 default:
717 unreachable("Unsupported explicit IO variable mode");
718 }
719
720 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
721
722 if (addr_format_is_global(addr_format)) {
723 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
724 } else {
725 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
726 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
727 }
728
729 if (mode != nir_var_mem_ubo && mode != nir_var_shader_in)
730 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
731
732 /* TODO: We should try and provide a better alignment. For OpenCL, we need
733 * to plumb the alignment through from SPIR-V when we have one.
734 */
735 nir_intrinsic_set_align(load, intrin->dest.ssa.bit_size / 8, 0);
736
737 assert(intrin->dest.is_ssa);
738 load->num_components = num_components;
739 nir_ssa_dest_init(&load->instr, &load->dest, num_components,
740 intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
741
742 assert(load->dest.ssa.bit_size % 8 == 0);
743
744 if (addr_format_needs_bounds_check(addr_format)) {
745 /* The Vulkan spec for robustBufferAccess gives us quite a few options
746 * as to what we can do with an OOB read. Unfortunately, returning
747 * undefined values isn't one of them so we return an actual zero.
748 */
749 nir_const_value zero_val;
750 memset(&zero_val, 0, sizeof(zero_val));
751 nir_ssa_def *zero = nir_build_imm(b, load->num_components,
752 load->dest.ssa.bit_size, zero_val);
753
754 const unsigned load_size =
755 (load->dest.ssa.bit_size / 8) * load->num_components;
756 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
757
758 nir_builder_instr_insert(b, &load->instr);
759
760 nir_pop_if(b, NULL);
761
762 return nir_if_phi(b, &load->dest.ssa, zero);
763 } else {
764 nir_builder_instr_insert(b, &load->instr);
765 return &load->dest.ssa;
766 }
767 }
768
769 static void
770 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
771 nir_ssa_def *addr, nir_address_format addr_format,
772 nir_ssa_def *value, nir_component_mask_t write_mask)
773 {
774 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
775
776 nir_intrinsic_op op;
777 switch (mode) {
778 case nir_var_mem_ssbo:
779 if (addr_format_is_global(addr_format))
780 op = nir_intrinsic_store_global;
781 else
782 op = nir_intrinsic_store_ssbo;
783 break;
784 case nir_var_mem_global:
785 assert(addr_format_is_global(addr_format));
786 op = nir_intrinsic_store_global;
787 break;
788 default:
789 unreachable("Unsupported explicit IO variable mode");
790 }
791
792 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
793
794 store->src[0] = nir_src_for_ssa(value);
795 if (addr_format_is_global(addr_format)) {
796 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
797 } else {
798 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
799 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
800 }
801
802 nir_intrinsic_set_write_mask(store, write_mask);
803
804 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
805
806 /* TODO: We should try and provide a better alignment. For OpenCL, we need
807 * to plumb the alignment through from SPIR-V when we have one.
808 */
809 nir_intrinsic_set_align(store, value->bit_size / 8, 0);
810
811 assert(value->num_components == 1 ||
812 value->num_components == intrin->num_components);
813 store->num_components = value->num_components;
814
815 assert(value->bit_size % 8 == 0);
816
817 if (addr_format_needs_bounds_check(addr_format)) {
818 const unsigned store_size = (value->bit_size / 8) * store->num_components;
819 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
820
821 nir_builder_instr_insert(b, &store->instr);
822
823 nir_pop_if(b, NULL);
824 } else {
825 nir_builder_instr_insert(b, &store->instr);
826 }
827 }
828
829 static nir_ssa_def *
830 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
831 nir_ssa_def *addr, nir_address_format addr_format)
832 {
833 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
834 const unsigned num_data_srcs =
835 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
836
837 nir_intrinsic_op op;
838 switch (mode) {
839 case nir_var_mem_ssbo:
840 if (addr_format_is_global(addr_format))
841 op = global_atomic_for_deref(intrin->intrinsic);
842 else
843 op = ssbo_atomic_for_deref(intrin->intrinsic);
844 break;
845 case nir_var_mem_global:
846 assert(addr_format_is_global(addr_format));
847 op = global_atomic_for_deref(intrin->intrinsic);
848 break;
849 default:
850 unreachable("Unsupported explicit IO variable mode");
851 }
852
853 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
854
855 unsigned src = 0;
856 if (addr_format_is_global(addr_format)) {
857 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
858 } else {
859 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
860 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
861 }
862 for (unsigned i = 0; i < num_data_srcs; i++) {
863 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
864 }
865
866 /* Global atomics don't have access flags because they assume that the
867 * address may be non-uniform.
868 */
869 if (!addr_format_is_global(addr_format))
870 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
871
872 assert(intrin->dest.ssa.num_components == 1);
873 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
874 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
875
876 assert(atomic->dest.ssa.bit_size % 8 == 0);
877
878 if (addr_format_needs_bounds_check(addr_format)) {
879 const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
880 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
881
882 nir_builder_instr_insert(b, &atomic->instr);
883
884 nir_pop_if(b, NULL);
885 return nir_if_phi(b, &atomic->dest.ssa,
886 nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
887 } else {
888 nir_builder_instr_insert(b, &atomic->instr);
889 return &atomic->dest.ssa;
890 }
891 }
892
893 static void
894 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
895 nir_address_format addr_format)
896 {
897 /* Just delete the deref if it's not used. We can't use
898 * nir_deref_instr_remove_if_unused here because it may remove more than
899 * one deref which could break our list walking since we walk the list
900 * backwards.
901 */
902 assert(list_empty(&deref->dest.ssa.if_uses));
903 if (list_empty(&deref->dest.ssa.uses)) {
904 nir_instr_remove(&deref->instr);
905 return;
906 }
907
908 b->cursor = nir_after_instr(&deref->instr);
909
910 nir_ssa_def *parent_addr = NULL;
911 if (deref->deref_type != nir_deref_type_var) {
912 assert(deref->parent.is_ssa);
913 parent_addr = deref->parent.ssa;
914 }
915
916
917 nir_ssa_def *addr = NULL;
918 assert(deref->dest.is_ssa);
919 switch (deref->deref_type) {
920 case nir_deref_type_var:
921 assert(deref->mode == nir_var_shader_in);
922 addr = nir_imm_intN_t(b, deref->var->data.driver_location,
923 deref->dest.ssa.bit_size);
924 break;
925
926 case nir_deref_type_array: {
927 nir_deref_instr *parent = nir_deref_instr_parent(deref);
928
929 unsigned stride = glsl_get_explicit_stride(parent->type);
930 if ((glsl_type_is_matrix(parent->type) &&
931 glsl_matrix_type_is_row_major(parent->type)) ||
932 (glsl_type_is_vector(parent->type) && stride == 0))
933 stride = type_scalar_size_bytes(parent->type);
934
935 assert(stride > 0);
936
937 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
938 index = nir_i2i(b, index, parent_addr->bit_size);
939 addr = build_addr_iadd(b, parent_addr, addr_format,
940 nir_imul_imm(b, index, stride));
941 break;
942 }
943
944 case nir_deref_type_ptr_as_array: {
945 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
946 index = nir_i2i(b, index, parent_addr->bit_size);
947 unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
948 addr = build_addr_iadd(b, parent_addr, addr_format,
949 nir_imul_imm(b, index, stride));
950 break;
951 }
952
953 case nir_deref_type_array_wildcard:
954 unreachable("Wildcards should be lowered by now");
955 break;
956
957 case nir_deref_type_struct: {
958 nir_deref_instr *parent = nir_deref_instr_parent(deref);
959 int offset = glsl_get_struct_field_offset(parent->type,
960 deref->strct.index);
961 assert(offset >= 0);
962 addr = build_addr_iadd_imm(b, parent_addr, addr_format, offset);
963 break;
964 }
965
966 case nir_deref_type_cast:
967 /* Nothing to do here */
968 addr = parent_addr;
969 break;
970 }
971
972 nir_instr_remove(&deref->instr);
973 nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
974 }
975
976 static void
977 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
978 nir_address_format addr_format)
979 {
980 b->cursor = nir_after_instr(&intrin->instr);
981
982 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
983 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
984 unsigned scalar_size = type_scalar_size_bytes(deref->type);
985 assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
986 assert(vec_stride == 0 || vec_stride >= scalar_size);
987
988 nir_ssa_def *addr = &deref->dest.ssa;
989 if (intrin->intrinsic == nir_intrinsic_load_deref) {
990 nir_ssa_def *value;
991 if (vec_stride > scalar_size) {
992 nir_ssa_def *comps[4] = { NULL, };
993 for (unsigned i = 0; i < intrin->num_components; i++) {
994 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
995 vec_stride * i);
996 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
997 addr_format, 1);
998 }
999 value = nir_vec(b, comps, intrin->num_components);
1000 } else {
1001 value = build_explicit_io_load(b, intrin, addr, addr_format,
1002 intrin->num_components);
1003 }
1004 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1005 } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
1006 assert(intrin->src[1].is_ssa);
1007 nir_ssa_def *value = intrin->src[1].ssa;
1008 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1009 if (vec_stride > scalar_size) {
1010 for (unsigned i = 0; i < intrin->num_components; i++) {
1011 if (!(write_mask & (1 << i)))
1012 continue;
1013
1014 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1015 vec_stride * i);
1016 build_explicit_io_store(b, intrin, comp_addr, addr_format,
1017 nir_channel(b, value, i), 1);
1018 }
1019 } else {
1020 build_explicit_io_store(b, intrin, addr, addr_format,
1021 value, write_mask);
1022 }
1023 } else {
1024 nir_ssa_def *value =
1025 build_explicit_io_atomic(b, intrin, addr, addr_format);
1026 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1027 }
1028
1029 nir_instr_remove(&intrin->instr);
1030 }
1031
1032 static void
1033 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
1034 nir_address_format addr_format)
1035 {
1036 b->cursor = nir_after_instr(&intrin->instr);
1037
1038 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1039
1040 assert(glsl_type_is_array(deref->type));
1041 assert(glsl_get_length(deref->type) == 0);
1042 unsigned stride = glsl_get_explicit_stride(deref->type);
1043 assert(stride > 0);
1044
1045 assert(addr_format == nir_address_format_32bit_index_offset);
1046 nir_ssa_def *addr = &deref->dest.ssa;
1047 nir_ssa_def *index = addr_to_index(b, addr, addr_format);
1048 nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
1049
1050 nir_intrinsic_instr *bsize =
1051 nir_intrinsic_instr_create(b->shader, nir_intrinsic_get_buffer_size);
1052 bsize->src[0] = nir_src_for_ssa(index);
1053 nir_ssa_dest_init(&bsize->instr, &bsize->dest, 1, 32, NULL);
1054 nir_builder_instr_insert(b, &bsize->instr);
1055
1056 nir_ssa_def *arr_size =
1057 nir_idiv(b, nir_isub(b, &bsize->dest.ssa, offset),
1058 nir_imm_int(b, stride));
1059
1060 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(arr_size));
1061 nir_instr_remove(&intrin->instr);
1062 }
1063
1064 static bool
1065 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
1066 nir_address_format addr_format)
1067 {
1068 bool progress = false;
1069
1070 nir_builder b;
1071 nir_builder_init(&b, impl);
1072
1073 /* Walk in reverse order so that we can see the full deref chain when we
1074 * lower the access operations. We lower them assuming that the derefs
1075 * will be turned into address calculations later.
1076 */
1077 nir_foreach_block_reverse(block, impl) {
1078 nir_foreach_instr_reverse_safe(instr, block) {
1079 switch (instr->type) {
1080 case nir_instr_type_deref: {
1081 nir_deref_instr *deref = nir_instr_as_deref(instr);
1082 if (deref->mode & modes) {
1083 lower_explicit_io_deref(&b, deref, addr_format);
1084 progress = true;
1085 }
1086 break;
1087 }
1088
1089 case nir_instr_type_intrinsic: {
1090 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1091 switch (intrin->intrinsic) {
1092 case nir_intrinsic_load_deref:
1093 case nir_intrinsic_store_deref:
1094 case nir_intrinsic_deref_atomic_add:
1095 case nir_intrinsic_deref_atomic_imin:
1096 case nir_intrinsic_deref_atomic_umin:
1097 case nir_intrinsic_deref_atomic_imax:
1098 case nir_intrinsic_deref_atomic_umax:
1099 case nir_intrinsic_deref_atomic_and:
1100 case nir_intrinsic_deref_atomic_or:
1101 case nir_intrinsic_deref_atomic_xor:
1102 case nir_intrinsic_deref_atomic_exchange:
1103 case nir_intrinsic_deref_atomic_comp_swap:
1104 case nir_intrinsic_deref_atomic_fadd:
1105 case nir_intrinsic_deref_atomic_fmin:
1106 case nir_intrinsic_deref_atomic_fmax:
1107 case nir_intrinsic_deref_atomic_fcomp_swap: {
1108 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1109 if (deref->mode & modes) {
1110 lower_explicit_io_access(&b, intrin, addr_format);
1111 progress = true;
1112 }
1113 break;
1114 }
1115
1116 case nir_intrinsic_deref_buffer_array_length: {
1117 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1118 if (deref->mode & modes) {
1119 lower_explicit_io_array_length(&b, intrin, addr_format);
1120 progress = true;
1121 }
1122 break;
1123 }
1124
1125 default:
1126 break;
1127 }
1128 break;
1129 }
1130
1131 default:
1132 /* Nothing to do */
1133 break;
1134 }
1135 }
1136 }
1137
1138 if (progress) {
1139 nir_metadata_preserve(impl, nir_metadata_block_index |
1140 nir_metadata_dominance);
1141 }
1142
1143 return progress;
1144 }
1145
1146 bool
1147 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
1148 nir_address_format addr_format)
1149 {
1150 bool progress = false;
1151
1152 nir_foreach_function(function, shader) {
1153 if (function->impl &&
1154 nir_lower_explicit_io_impl(function->impl, modes, addr_format))
1155 progress = true;
1156 }
1157
1158 return progress;
1159 }
1160
1161 /**
1162 * Return the offset source for a load/store intrinsic.
1163 */
1164 nir_src *
1165 nir_get_io_offset_src(nir_intrinsic_instr *instr)
1166 {
1167 switch (instr->intrinsic) {
1168 case nir_intrinsic_load_input:
1169 case nir_intrinsic_load_output:
1170 case nir_intrinsic_load_shared:
1171 case nir_intrinsic_load_uniform:
1172 case nir_intrinsic_load_global:
1173 return &instr->src[0];
1174 case nir_intrinsic_load_ubo:
1175 case nir_intrinsic_load_ssbo:
1176 case nir_intrinsic_load_per_vertex_input:
1177 case nir_intrinsic_load_per_vertex_output:
1178 case nir_intrinsic_load_interpolated_input:
1179 case nir_intrinsic_store_output:
1180 case nir_intrinsic_store_shared:
1181 case nir_intrinsic_store_global:
1182 return &instr->src[1];
1183 case nir_intrinsic_store_ssbo:
1184 case nir_intrinsic_store_per_vertex_output:
1185 return &instr->src[2];
1186 default:
1187 return NULL;
1188 }
1189 }
1190
1191 /**
1192 * Return the vertex index source for a load/store per_vertex intrinsic.
1193 */
1194 nir_src *
1195 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
1196 {
1197 switch (instr->intrinsic) {
1198 case nir_intrinsic_load_per_vertex_input:
1199 case nir_intrinsic_load_per_vertex_output:
1200 return &instr->src[0];
1201 case nir_intrinsic_store_per_vertex_output:
1202 return &instr->src[1];
1203 default:
1204 return NULL;
1205 }
1206 }