nir/lower_io: Add a bounds-checked 64-bit global address format
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37
38 struct lower_io_state {
39 void *dead_ctx;
40 nir_builder builder;
41 int (*type_size)(const struct glsl_type *type);
42 nir_variable_mode modes;
43 nir_lower_io_options options;
44 };
45
46 static nir_intrinsic_op
47 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
48 {
49 switch (deref_op) {
50 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
51 OP(atomic_exchange)
52 OP(atomic_comp_swap)
53 OP(atomic_add)
54 OP(atomic_imin)
55 OP(atomic_umin)
56 OP(atomic_imax)
57 OP(atomic_umax)
58 OP(atomic_and)
59 OP(atomic_or)
60 OP(atomic_xor)
61 OP(atomic_fadd)
62 OP(atomic_fmin)
63 OP(atomic_fmax)
64 OP(atomic_fcomp_swap)
65 #undef OP
66 default:
67 unreachable("Invalid SSBO atomic");
68 }
69 }
70
71 static nir_intrinsic_op
72 global_atomic_for_deref(nir_intrinsic_op deref_op)
73 {
74 switch (deref_op) {
75 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
76 OP(atomic_exchange)
77 OP(atomic_comp_swap)
78 OP(atomic_add)
79 OP(atomic_imin)
80 OP(atomic_umin)
81 OP(atomic_imax)
82 OP(atomic_umax)
83 OP(atomic_and)
84 OP(atomic_or)
85 OP(atomic_xor)
86 OP(atomic_fadd)
87 OP(atomic_fmin)
88 OP(atomic_fmax)
89 OP(atomic_fcomp_swap)
90 #undef OP
91 default:
92 unreachable("Invalid SSBO atomic");
93 }
94 }
95
96 void
97 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
98 int (*type_size)(const struct glsl_type *))
99 {
100 unsigned location = 0;
101
102 nir_foreach_variable(var, var_list) {
103 /*
104 * UBOs have their own address spaces, so don't count them towards the
105 * number of global uniforms
106 */
107 if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo)
108 continue;
109
110 var->data.driver_location = location;
111 location += type_size(var->type);
112 }
113
114 *size = location;
115 }
116
117 /**
118 * Return true if the given variable is a per-vertex input/output array.
119 * (such as geometry shader inputs).
120 */
121 bool
122 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
123 {
124 if (var->data.patch || !glsl_type_is_array(var->type))
125 return false;
126
127 if (var->data.mode == nir_var_shader_in)
128 return stage == MESA_SHADER_GEOMETRY ||
129 stage == MESA_SHADER_TESS_CTRL ||
130 stage == MESA_SHADER_TESS_EVAL;
131
132 if (var->data.mode == nir_var_shader_out)
133 return stage == MESA_SHADER_TESS_CTRL;
134
135 return false;
136 }
137
138 static nir_ssa_def *
139 get_io_offset(nir_builder *b, nir_deref_instr *deref,
140 nir_ssa_def **vertex_index,
141 int (*type_size)(const struct glsl_type *),
142 unsigned *component)
143 {
144 nir_deref_path path;
145 nir_deref_path_init(&path, deref, NULL);
146
147 assert(path.path[0]->deref_type == nir_deref_type_var);
148 nir_deref_instr **p = &path.path[1];
149
150 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
151 * outermost array index separate. Process the rest normally.
152 */
153 if (vertex_index != NULL) {
154 assert((*p)->deref_type == nir_deref_type_array);
155 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
156 p++;
157 }
158
159 if (path.path[0]->var->data.compact) {
160 assert((*p)->deref_type == nir_deref_type_array);
161 assert(glsl_type_is_scalar((*p)->type));
162
163 /* We always lower indirect dereferences for "compact" array vars. */
164 const unsigned index = nir_src_as_uint((*p)->arr.index);
165 const unsigned total_offset = *component + index;
166 const unsigned slot_offset = total_offset / 4;
167 *component = total_offset % 4;
168 return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
169 }
170
171 /* Just emit code and let constant-folding go to town */
172 nir_ssa_def *offset = nir_imm_int(b, 0);
173
174 for (; *p; p++) {
175 if ((*p)->deref_type == nir_deref_type_array) {
176 unsigned size = type_size((*p)->type);
177
178 nir_ssa_def *mul =
179 nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
180
181 offset = nir_iadd(b, offset, mul);
182 } else if ((*p)->deref_type == nir_deref_type_struct) {
183 /* p starts at path[1], so this is safe */
184 nir_deref_instr *parent = *(p - 1);
185
186 unsigned field_offset = 0;
187 for (unsigned i = 0; i < (*p)->strct.index; i++) {
188 field_offset += type_size(glsl_get_struct_field(parent->type, i));
189 }
190 offset = nir_iadd_imm(b, offset, field_offset);
191 } else {
192 unreachable("Unsupported deref type");
193 }
194 }
195
196 nir_deref_path_finish(&path);
197
198 return offset;
199 }
200
201 static nir_intrinsic_instr *
202 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
203 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
204 unsigned component)
205 {
206 const nir_shader *nir = state->builder.shader;
207 nir_variable_mode mode = var->data.mode;
208 nir_ssa_def *barycentric = NULL;
209
210 nir_intrinsic_op op;
211 switch (mode) {
212 case nir_var_shader_in:
213 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
214 nir->options->use_interpolated_input_intrinsics &&
215 var->data.interpolation != INTERP_MODE_FLAT) {
216 assert(vertex_index == NULL);
217
218 nir_intrinsic_op bary_op;
219 if (var->data.sample ||
220 (state->options & nir_lower_io_force_sample_interpolation))
221 bary_op = nir_intrinsic_load_barycentric_sample;
222 else if (var->data.centroid)
223 bary_op = nir_intrinsic_load_barycentric_centroid;
224 else
225 bary_op = nir_intrinsic_load_barycentric_pixel;
226
227 barycentric = nir_load_barycentric(&state->builder, bary_op,
228 var->data.interpolation);
229 op = nir_intrinsic_load_interpolated_input;
230 } else {
231 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
232 nir_intrinsic_load_input;
233 }
234 break;
235 case nir_var_shader_out:
236 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
237 nir_intrinsic_load_output;
238 break;
239 case nir_var_uniform:
240 op = nir_intrinsic_load_uniform;
241 break;
242 case nir_var_mem_shared:
243 op = nir_intrinsic_load_shared;
244 break;
245 default:
246 unreachable("Unknown variable mode");
247 }
248
249 nir_intrinsic_instr *load =
250 nir_intrinsic_instr_create(state->builder.shader, op);
251 load->num_components = intrin->num_components;
252
253 nir_intrinsic_set_base(load, var->data.driver_location);
254 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
255 nir_intrinsic_set_component(load, component);
256
257 if (load->intrinsic == nir_intrinsic_load_uniform)
258 nir_intrinsic_set_range(load, state->type_size(var->type));
259
260 if (vertex_index) {
261 load->src[0] = nir_src_for_ssa(vertex_index);
262 load->src[1] = nir_src_for_ssa(offset);
263 } else if (barycentric) {
264 load->src[0] = nir_src_for_ssa(barycentric);
265 load->src[1] = nir_src_for_ssa(offset);
266 } else {
267 load->src[0] = nir_src_for_ssa(offset);
268 }
269
270 return load;
271 }
272
273 static nir_intrinsic_instr *
274 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
275 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
276 unsigned component)
277 {
278 nir_variable_mode mode = var->data.mode;
279
280 nir_intrinsic_op op;
281 if (mode == nir_var_mem_shared) {
282 op = nir_intrinsic_store_shared;
283 } else {
284 assert(mode == nir_var_shader_out);
285 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
286 nir_intrinsic_store_output;
287 }
288
289 nir_intrinsic_instr *store =
290 nir_intrinsic_instr_create(state->builder.shader, op);
291 store->num_components = intrin->num_components;
292
293 nir_src_copy(&store->src[0], &intrin->src[1], store);
294
295 nir_intrinsic_set_base(store, var->data.driver_location);
296
297 if (mode == nir_var_shader_out)
298 nir_intrinsic_set_component(store, component);
299
300 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
301
302 if (vertex_index)
303 store->src[1] = nir_src_for_ssa(vertex_index);
304
305 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
306
307 return store;
308 }
309
310 static nir_intrinsic_instr *
311 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
312 nir_variable *var, nir_ssa_def *offset)
313 {
314 assert(var->data.mode == nir_var_mem_shared);
315
316 nir_intrinsic_op op;
317 switch (intrin->intrinsic) {
318 #define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break;
319 OP(atomic_exchange)
320 OP(atomic_comp_swap)
321 OP(atomic_add)
322 OP(atomic_imin)
323 OP(atomic_umin)
324 OP(atomic_imax)
325 OP(atomic_umax)
326 OP(atomic_and)
327 OP(atomic_or)
328 OP(atomic_xor)
329 OP(atomic_fadd)
330 OP(atomic_fmin)
331 OP(atomic_fmax)
332 OP(atomic_fcomp_swap)
333 #undef OP
334 default:
335 unreachable("Invalid atomic");
336 }
337
338 nir_intrinsic_instr *atomic =
339 nir_intrinsic_instr_create(state->builder.shader, op);
340
341 nir_intrinsic_set_base(atomic, var->data.driver_location);
342
343 atomic->src[0] = nir_src_for_ssa(offset);
344 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs ==
345 nir_intrinsic_infos[op].num_srcs);
346 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) {
347 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic);
348 }
349
350 return atomic;
351 }
352
353 static nir_intrinsic_instr *
354 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
355 nir_variable *var, nir_ssa_def *offset, unsigned component)
356 {
357 assert(var->data.mode == nir_var_shader_in);
358
359 /* Ignore interpolateAt() for flat variables - flat is flat. */
360 if (var->data.interpolation == INTERP_MODE_FLAT)
361 return lower_load(intrin, state, NULL, var, offset, component);
362
363 nir_intrinsic_op bary_op;
364 switch (intrin->intrinsic) {
365 case nir_intrinsic_interp_deref_at_centroid:
366 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
367 nir_intrinsic_load_barycentric_sample :
368 nir_intrinsic_load_barycentric_centroid;
369 break;
370 case nir_intrinsic_interp_deref_at_sample:
371 bary_op = nir_intrinsic_load_barycentric_at_sample;
372 break;
373 case nir_intrinsic_interp_deref_at_offset:
374 bary_op = nir_intrinsic_load_barycentric_at_offset;
375 break;
376 default:
377 unreachable("Bogus interpolateAt() intrinsic.");
378 }
379
380 nir_intrinsic_instr *bary_setup =
381 nir_intrinsic_instr_create(state->builder.shader, bary_op);
382
383 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
384 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
385
386 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
387 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset)
388 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
389
390 nir_builder_instr_insert(&state->builder, &bary_setup->instr);
391
392 nir_intrinsic_instr *load =
393 nir_intrinsic_instr_create(state->builder.shader,
394 nir_intrinsic_load_interpolated_input);
395 load->num_components = intrin->num_components;
396
397 nir_intrinsic_set_base(load, var->data.driver_location);
398 nir_intrinsic_set_component(load, component);
399
400 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
401 load->src[1] = nir_src_for_ssa(offset);
402
403 return load;
404 }
405
406 static bool
407 nir_lower_io_block(nir_block *block,
408 struct lower_io_state *state)
409 {
410 nir_builder *b = &state->builder;
411 const nir_shader_compiler_options *options = b->shader->options;
412 bool progress = false;
413
414 nir_foreach_instr_safe(instr, block) {
415 if (instr->type != nir_instr_type_intrinsic)
416 continue;
417
418 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
419
420 switch (intrin->intrinsic) {
421 case nir_intrinsic_load_deref:
422 case nir_intrinsic_store_deref:
423 case nir_intrinsic_deref_atomic_add:
424 case nir_intrinsic_deref_atomic_imin:
425 case nir_intrinsic_deref_atomic_umin:
426 case nir_intrinsic_deref_atomic_imax:
427 case nir_intrinsic_deref_atomic_umax:
428 case nir_intrinsic_deref_atomic_and:
429 case nir_intrinsic_deref_atomic_or:
430 case nir_intrinsic_deref_atomic_xor:
431 case nir_intrinsic_deref_atomic_exchange:
432 case nir_intrinsic_deref_atomic_comp_swap:
433 case nir_intrinsic_deref_atomic_fadd:
434 case nir_intrinsic_deref_atomic_fmin:
435 case nir_intrinsic_deref_atomic_fmax:
436 case nir_intrinsic_deref_atomic_fcomp_swap:
437 /* We can lower the io for this nir instrinsic */
438 break;
439 case nir_intrinsic_interp_deref_at_centroid:
440 case nir_intrinsic_interp_deref_at_sample:
441 case nir_intrinsic_interp_deref_at_offset:
442 /* We can optionally lower these to load_interpolated_input */
443 if (options->use_interpolated_input_intrinsics)
444 break;
445 default:
446 /* We can't lower the io for this nir instrinsic, so skip it */
447 continue;
448 }
449
450 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
451
452 nir_variable *var = nir_deref_instr_get_variable(deref);
453 nir_variable_mode mode = var->data.mode;
454
455 if ((state->modes & mode) == 0)
456 continue;
457
458 if (mode != nir_var_shader_in &&
459 mode != nir_var_shader_out &&
460 mode != nir_var_mem_shared &&
461 mode != nir_var_uniform)
462 continue;
463
464 b->cursor = nir_before_instr(instr);
465
466 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
467
468 nir_ssa_def *offset;
469 nir_ssa_def *vertex_index = NULL;
470 unsigned component_offset = var->data.location_frac;
471
472 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
473 state->type_size, &component_offset);
474
475 nir_intrinsic_instr *replacement;
476
477 switch (intrin->intrinsic) {
478 case nir_intrinsic_load_deref:
479 replacement = lower_load(intrin, state, vertex_index, var, offset,
480 component_offset);
481 break;
482
483 case nir_intrinsic_store_deref:
484 replacement = lower_store(intrin, state, vertex_index, var, offset,
485 component_offset);
486 break;
487
488 case nir_intrinsic_deref_atomic_add:
489 case nir_intrinsic_deref_atomic_imin:
490 case nir_intrinsic_deref_atomic_umin:
491 case nir_intrinsic_deref_atomic_imax:
492 case nir_intrinsic_deref_atomic_umax:
493 case nir_intrinsic_deref_atomic_and:
494 case nir_intrinsic_deref_atomic_or:
495 case nir_intrinsic_deref_atomic_xor:
496 case nir_intrinsic_deref_atomic_exchange:
497 case nir_intrinsic_deref_atomic_comp_swap:
498 case nir_intrinsic_deref_atomic_fadd:
499 case nir_intrinsic_deref_atomic_fmin:
500 case nir_intrinsic_deref_atomic_fmax:
501 case nir_intrinsic_deref_atomic_fcomp_swap:
502 assert(vertex_index == NULL);
503 replacement = lower_atomic(intrin, state, var, offset);
504 break;
505
506 case nir_intrinsic_interp_deref_at_centroid:
507 case nir_intrinsic_interp_deref_at_sample:
508 case nir_intrinsic_interp_deref_at_offset:
509 assert(vertex_index == NULL);
510 replacement = lower_interpolate_at(intrin, state, var, offset,
511 component_offset);
512 break;
513
514 default:
515 continue;
516 }
517
518 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
519 if (intrin->dest.is_ssa) {
520 nir_ssa_dest_init(&replacement->instr, &replacement->dest,
521 intrin->dest.ssa.num_components,
522 intrin->dest.ssa.bit_size, NULL);
523 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
524 nir_src_for_ssa(&replacement->dest.ssa));
525 } else {
526 nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr);
527 }
528 }
529
530 nir_instr_insert_before(&intrin->instr, &replacement->instr);
531 nir_instr_remove(&intrin->instr);
532 progress = true;
533 }
534
535 return progress;
536 }
537
538 static bool
539 nir_lower_io_impl(nir_function_impl *impl,
540 nir_variable_mode modes,
541 int (*type_size)(const struct glsl_type *),
542 nir_lower_io_options options)
543 {
544 struct lower_io_state state;
545 bool progress = false;
546
547 nir_builder_init(&state.builder, impl);
548 state.dead_ctx = ralloc_context(NULL);
549 state.modes = modes;
550 state.type_size = type_size;
551 state.options = options;
552
553 nir_foreach_block(block, impl) {
554 progress |= nir_lower_io_block(block, &state);
555 }
556
557 ralloc_free(state.dead_ctx);
558
559 nir_metadata_preserve(impl, nir_metadata_block_index |
560 nir_metadata_dominance);
561 return progress;
562 }
563
564 bool
565 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
566 int (*type_size)(const struct glsl_type *),
567 nir_lower_io_options options)
568 {
569 bool progress = false;
570
571 nir_foreach_function(function, shader) {
572 if (function->impl) {
573 progress |= nir_lower_io_impl(function->impl, modes,
574 type_size, options);
575 }
576 }
577
578 return progress;
579 }
580
581 static unsigned
582 type_scalar_size_bytes(const struct glsl_type *type)
583 {
584 assert(glsl_type_is_vector_or_scalar(type) ||
585 glsl_type_is_matrix(type));
586 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
587 }
588
589 static nir_ssa_def *
590 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
591 nir_address_format addr_format, nir_ssa_def *offset)
592 {
593 assert(offset->num_components == 1);
594 assert(addr->bit_size == offset->bit_size);
595
596 switch (addr_format) {
597 case nir_address_format_32bit_global:
598 case nir_address_format_64bit_global:
599 assert(addr->num_components == 1);
600 return nir_iadd(b, addr, offset);
601
602 case nir_address_format_64bit_bounded_global:
603 assert(addr->num_components == 4);
604 return nir_vec4(b, nir_channel(b, addr, 0),
605 nir_channel(b, addr, 1),
606 nir_channel(b, addr, 2),
607 nir_iadd(b, nir_channel(b, addr, 3), offset));
608
609 case nir_address_format_32bit_index_offset:
610 assert(addr->num_components == 2);
611 return nir_vec2(b, nir_channel(b, addr, 0),
612 nir_iadd(b, nir_channel(b, addr, 1), offset));
613 }
614 unreachable("Invalid address format");
615 }
616
617 static nir_ssa_def *
618 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
619 nir_address_format addr_format, int64_t offset)
620 {
621 return build_addr_iadd(b, addr, addr_format,
622 nir_imm_intN_t(b, offset, addr->bit_size));
623 }
624
625 static nir_ssa_def *
626 addr_to_index(nir_builder *b, nir_ssa_def *addr,
627 nir_address_format addr_format)
628 {
629 assert(addr_format == nir_address_format_32bit_index_offset);
630 assert(addr->num_components == 2);
631 return nir_channel(b, addr, 0);
632 }
633
634 static nir_ssa_def *
635 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
636 nir_address_format addr_format)
637 {
638 assert(addr_format == nir_address_format_32bit_index_offset);
639 assert(addr->num_components == 2);
640 return nir_channel(b, addr, 1);
641 }
642
643 /** Returns true if the given address format resolves to a global address */
644 static bool
645 addr_format_is_global(nir_address_format addr_format)
646 {
647 return addr_format == nir_address_format_32bit_global ||
648 addr_format == nir_address_format_64bit_global ||
649 addr_format == nir_address_format_64bit_bounded_global;
650 }
651
652 static nir_ssa_def *
653 addr_to_global(nir_builder *b, nir_ssa_def *addr,
654 nir_address_format addr_format)
655 {
656 switch (addr_format) {
657 case nir_address_format_32bit_global:
658 case nir_address_format_64bit_global:
659 assert(addr->num_components == 1);
660 return addr;
661
662 case nir_address_format_64bit_bounded_global:
663 assert(addr->num_components == 4);
664 return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
665 nir_u2u64(b, nir_channel(b, addr, 3)));
666
667 case nir_address_format_32bit_index_offset:
668 unreachable("Cannot get a 64-bit address with this address format");
669 }
670
671 unreachable("Invalid address format");
672 }
673
674 static bool
675 addr_format_needs_bounds_check(nir_address_format addr_format)
676 {
677 return addr_format == nir_address_format_64bit_bounded_global;
678 }
679
680 static nir_ssa_def *
681 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
682 nir_address_format addr_format, unsigned size)
683 {
684 assert(addr_format == nir_address_format_64bit_bounded_global);
685 assert(addr->num_components == 4);
686 return nir_ige(b, nir_channel(b, addr, 2),
687 nir_iadd_imm(b, nir_channel(b, addr, 3), size));
688 }
689
690 static nir_ssa_def *
691 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
692 nir_ssa_def *addr, nir_address_format addr_format,
693 unsigned num_components)
694 {
695 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
696
697 nir_intrinsic_op op;
698 switch (mode) {
699 case nir_var_mem_ubo:
700 op = nir_intrinsic_load_ubo;
701 break;
702 case nir_var_mem_ssbo:
703 if (addr_format_is_global(addr_format))
704 op = nir_intrinsic_load_global;
705 else
706 op = nir_intrinsic_load_ssbo;
707 break;
708 case nir_var_mem_global:
709 assert(addr_format_is_global(addr_format));
710 op = nir_intrinsic_load_global;
711 break;
712 case nir_var_shader_in:
713 assert(addr_format_is_global(addr_format));
714 op = nir_intrinsic_load_kernel_input;
715 break;
716 default:
717 unreachable("Unsupported explicit IO variable mode");
718 }
719
720 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
721
722 if (addr_format_is_global(addr_format)) {
723 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
724 } else {
725 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
726 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
727 }
728
729 if (mode != nir_var_mem_ubo && mode != nir_var_shader_in)
730 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
731
732 /* TODO: We should try and provide a better alignment. For OpenCL, we need
733 * to plumb the alignment through from SPIR-V when we have one.
734 */
735 nir_intrinsic_set_align(load, intrin->dest.ssa.bit_size / 8, 0);
736
737 assert(intrin->dest.is_ssa);
738 load->num_components = num_components;
739 nir_ssa_dest_init(&load->instr, &load->dest, num_components,
740 intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
741
742 assert(load->dest.ssa.bit_size % 8 == 0);
743
744 if (addr_format_needs_bounds_check(addr_format)) {
745 /* The Vulkan spec for robustBufferAccess gives us quite a few options
746 * as to what we can do with an OOB read. Unfortunately, returning
747 * undefined values isn't one of them so we return an actual zero.
748 */
749 nir_const_value zero_val;
750 memset(&zero_val, 0, sizeof(zero_val));
751 nir_ssa_def *zero = nir_build_imm(b, load->num_components,
752 load->dest.ssa.bit_size, zero_val);
753
754 const unsigned load_size =
755 (load->dest.ssa.bit_size / 8) * load->num_components;
756 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
757
758 nir_builder_instr_insert(b, &load->instr);
759
760 nir_pop_if(b, NULL);
761
762 return nir_if_phi(b, &load->dest.ssa, zero);
763 } else {
764 nir_builder_instr_insert(b, &load->instr);
765 return &load->dest.ssa;
766 }
767 }
768
769 static void
770 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
771 nir_ssa_def *addr, nir_address_format addr_format,
772 nir_ssa_def *value, nir_component_mask_t write_mask)
773 {
774 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
775
776 nir_intrinsic_op op;
777 switch (mode) {
778 case nir_var_mem_ssbo:
779 if (addr_format_is_global(addr_format))
780 op = nir_intrinsic_store_global;
781 else
782 op = nir_intrinsic_store_ssbo;
783 break;
784 case nir_var_mem_global:
785 assert(addr_format_is_global(addr_format));
786 op = nir_intrinsic_store_global;
787 break;
788 default:
789 unreachable("Unsupported explicit IO variable mode");
790 }
791
792 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
793
794 store->src[0] = nir_src_for_ssa(value);
795 if (addr_format_is_global(addr_format)) {
796 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
797 } else {
798 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
799 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
800 }
801
802 nir_intrinsic_set_write_mask(store, write_mask);
803
804 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
805
806 /* TODO: We should try and provide a better alignment. For OpenCL, we need
807 * to plumb the alignment through from SPIR-V when we have one.
808 */
809 nir_intrinsic_set_align(store, value->bit_size / 8, 0);
810
811 assert(value->num_components == 1 ||
812 value->num_components == intrin->num_components);
813 store->num_components = value->num_components;
814
815 assert(value->bit_size % 8 == 0);
816
817 if (addr_format_needs_bounds_check(addr_format)) {
818 const unsigned store_size = (value->bit_size / 8) * store->num_components;
819 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
820
821 nir_builder_instr_insert(b, &store->instr);
822
823 nir_pop_if(b, NULL);
824 } else {
825 nir_builder_instr_insert(b, &store->instr);
826 }
827 }
828
829 static nir_ssa_def *
830 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
831 nir_ssa_def *addr, nir_address_format addr_format)
832 {
833 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
834 const unsigned num_data_srcs =
835 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
836
837 nir_intrinsic_op op;
838 switch (mode) {
839 case nir_var_mem_ssbo:
840 if (addr_format_is_global(addr_format))
841 op = global_atomic_for_deref(intrin->intrinsic);
842 else
843 op = ssbo_atomic_for_deref(intrin->intrinsic);
844 break;
845 case nir_var_mem_global:
846 assert(addr_format_is_global(addr_format));
847 op = global_atomic_for_deref(intrin->intrinsic);
848 break;
849 default:
850 unreachable("Unsupported explicit IO variable mode");
851 }
852
853 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
854
855 unsigned src = 0;
856 if (addr_format_is_global(addr_format)) {
857 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
858 } else {
859 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
860 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
861 }
862 for (unsigned i = 0; i < num_data_srcs; i++) {
863 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
864 }
865
866 assert(intrin->dest.ssa.num_components == 1);
867 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
868 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
869
870 assert(atomic->dest.ssa.bit_size % 8 == 0);
871
872 if (addr_format_needs_bounds_check(addr_format)) {
873 const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
874 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
875
876 nir_builder_instr_insert(b, &atomic->instr);
877
878 nir_pop_if(b, NULL);
879 return nir_if_phi(b, &atomic->dest.ssa,
880 nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
881 } else {
882 nir_builder_instr_insert(b, &atomic->instr);
883 return &atomic->dest.ssa;
884 }
885 }
886
887 static void
888 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
889 nir_address_format addr_format)
890 {
891 /* Just delete the deref if it's not used. We can't use
892 * nir_deref_instr_remove_if_unused here because it may remove more than
893 * one deref which could break our list walking since we walk the list
894 * backwards.
895 */
896 assert(list_empty(&deref->dest.ssa.if_uses));
897 if (list_empty(&deref->dest.ssa.uses)) {
898 nir_instr_remove(&deref->instr);
899 return;
900 }
901
902 b->cursor = nir_after_instr(&deref->instr);
903
904 nir_ssa_def *parent_addr = NULL;
905 if (deref->deref_type != nir_deref_type_var) {
906 assert(deref->parent.is_ssa);
907 parent_addr = deref->parent.ssa;
908 }
909
910
911 nir_ssa_def *addr = NULL;
912 assert(deref->dest.is_ssa);
913 switch (deref->deref_type) {
914 case nir_deref_type_var:
915 assert(deref->mode == nir_var_shader_in);
916 addr = nir_imm_intN_t(b, deref->var->data.driver_location,
917 deref->dest.ssa.bit_size);
918 break;
919
920 case nir_deref_type_array: {
921 nir_deref_instr *parent = nir_deref_instr_parent(deref);
922
923 unsigned stride = glsl_get_explicit_stride(parent->type);
924 if ((glsl_type_is_matrix(parent->type) &&
925 glsl_matrix_type_is_row_major(parent->type)) ||
926 (glsl_type_is_vector(parent->type) && stride == 0))
927 stride = type_scalar_size_bytes(parent->type);
928
929 assert(stride > 0);
930
931 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
932 index = nir_i2i(b, index, parent_addr->bit_size);
933 addr = build_addr_iadd(b, parent_addr, addr_format,
934 nir_imul_imm(b, index, stride));
935 break;
936 }
937
938 case nir_deref_type_ptr_as_array: {
939 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
940 index = nir_i2i(b, index, parent_addr->bit_size);
941 unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
942 addr = build_addr_iadd(b, parent_addr, addr_format,
943 nir_imul_imm(b, index, stride));
944 break;
945 }
946
947 case nir_deref_type_array_wildcard:
948 unreachable("Wildcards should be lowered by now");
949 break;
950
951 case nir_deref_type_struct: {
952 nir_deref_instr *parent = nir_deref_instr_parent(deref);
953 int offset = glsl_get_struct_field_offset(parent->type,
954 deref->strct.index);
955 assert(offset >= 0);
956 addr = build_addr_iadd_imm(b, parent_addr, addr_format, offset);
957 break;
958 }
959
960 case nir_deref_type_cast:
961 /* Nothing to do here */
962 addr = parent_addr;
963 break;
964 }
965
966 nir_instr_remove(&deref->instr);
967 nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
968 }
969
970 static void
971 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
972 nir_address_format addr_format)
973 {
974 b->cursor = nir_after_instr(&intrin->instr);
975
976 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
977 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
978 unsigned scalar_size = type_scalar_size_bytes(deref->type);
979 assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
980 assert(vec_stride == 0 || vec_stride >= scalar_size);
981
982 nir_ssa_def *addr = &deref->dest.ssa;
983 if (intrin->intrinsic == nir_intrinsic_load_deref) {
984 nir_ssa_def *value;
985 if (vec_stride > scalar_size) {
986 nir_ssa_def *comps[4] = { NULL, };
987 for (unsigned i = 0; i < intrin->num_components; i++) {
988 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
989 vec_stride * i);
990 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
991 addr_format, 1);
992 }
993 value = nir_vec(b, comps, intrin->num_components);
994 } else {
995 value = build_explicit_io_load(b, intrin, addr, addr_format,
996 intrin->num_components);
997 }
998 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
999 } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
1000 assert(intrin->src[1].is_ssa);
1001 nir_ssa_def *value = intrin->src[1].ssa;
1002 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1003 if (vec_stride > scalar_size) {
1004 for (unsigned i = 0; i < intrin->num_components; i++) {
1005 if (!(write_mask & (1 << i)))
1006 continue;
1007
1008 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1009 vec_stride * i);
1010 build_explicit_io_store(b, intrin, comp_addr, addr_format,
1011 nir_channel(b, value, i), 1);
1012 }
1013 } else {
1014 build_explicit_io_store(b, intrin, addr, addr_format,
1015 value, write_mask);
1016 }
1017 } else {
1018 nir_ssa_def *value =
1019 build_explicit_io_atomic(b, intrin, addr, addr_format);
1020 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1021 }
1022
1023 nir_instr_remove(&intrin->instr);
1024 }
1025
1026 static void
1027 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
1028 nir_address_format addr_format)
1029 {
1030 b->cursor = nir_after_instr(&intrin->instr);
1031
1032 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1033
1034 assert(glsl_type_is_array(deref->type));
1035 assert(glsl_get_length(deref->type) == 0);
1036 unsigned stride = glsl_get_explicit_stride(deref->type);
1037 assert(stride > 0);
1038
1039 assert(addr_format == nir_address_format_32bit_index_offset);
1040 nir_ssa_def *addr = &deref->dest.ssa;
1041 nir_ssa_def *index = addr_to_index(b, addr, addr_format);
1042 nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
1043
1044 nir_intrinsic_instr *bsize =
1045 nir_intrinsic_instr_create(b->shader, nir_intrinsic_get_buffer_size);
1046 bsize->src[0] = nir_src_for_ssa(index);
1047 nir_ssa_dest_init(&bsize->instr, &bsize->dest, 1, 32, NULL);
1048 nir_builder_instr_insert(b, &bsize->instr);
1049
1050 nir_ssa_def *arr_size =
1051 nir_idiv(b, nir_isub(b, &bsize->dest.ssa, offset),
1052 nir_imm_int(b, stride));
1053
1054 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(arr_size));
1055 nir_instr_remove(&intrin->instr);
1056 }
1057
1058 static bool
1059 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
1060 nir_address_format addr_format)
1061 {
1062 bool progress = false;
1063
1064 nir_builder b;
1065 nir_builder_init(&b, impl);
1066
1067 /* Walk in reverse order so that we can see the full deref chain when we
1068 * lower the access operations. We lower them assuming that the derefs
1069 * will be turned into address calculations later.
1070 */
1071 nir_foreach_block_reverse(block, impl) {
1072 nir_foreach_instr_reverse_safe(instr, block) {
1073 switch (instr->type) {
1074 case nir_instr_type_deref: {
1075 nir_deref_instr *deref = nir_instr_as_deref(instr);
1076 if (deref->mode & modes) {
1077 lower_explicit_io_deref(&b, deref, addr_format);
1078 progress = true;
1079 }
1080 break;
1081 }
1082
1083 case nir_instr_type_intrinsic: {
1084 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1085 switch (intrin->intrinsic) {
1086 case nir_intrinsic_load_deref:
1087 case nir_intrinsic_store_deref:
1088 case nir_intrinsic_deref_atomic_add:
1089 case nir_intrinsic_deref_atomic_imin:
1090 case nir_intrinsic_deref_atomic_umin:
1091 case nir_intrinsic_deref_atomic_imax:
1092 case nir_intrinsic_deref_atomic_umax:
1093 case nir_intrinsic_deref_atomic_and:
1094 case nir_intrinsic_deref_atomic_or:
1095 case nir_intrinsic_deref_atomic_xor:
1096 case nir_intrinsic_deref_atomic_exchange:
1097 case nir_intrinsic_deref_atomic_comp_swap:
1098 case nir_intrinsic_deref_atomic_fadd:
1099 case nir_intrinsic_deref_atomic_fmin:
1100 case nir_intrinsic_deref_atomic_fmax:
1101 case nir_intrinsic_deref_atomic_fcomp_swap: {
1102 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1103 if (deref->mode & modes) {
1104 lower_explicit_io_access(&b, intrin, addr_format);
1105 progress = true;
1106 }
1107 break;
1108 }
1109
1110 case nir_intrinsic_deref_buffer_array_length: {
1111 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1112 if (deref->mode & modes) {
1113 lower_explicit_io_array_length(&b, intrin, addr_format);
1114 progress = true;
1115 }
1116 break;
1117 }
1118
1119 default:
1120 break;
1121 }
1122 break;
1123 }
1124
1125 default:
1126 /* Nothing to do */
1127 break;
1128 }
1129 }
1130 }
1131
1132 if (progress) {
1133 nir_metadata_preserve(impl, nir_metadata_block_index |
1134 nir_metadata_dominance);
1135 }
1136
1137 return progress;
1138 }
1139
1140 bool
1141 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
1142 nir_address_format addr_format)
1143 {
1144 bool progress = false;
1145
1146 nir_foreach_function(function, shader) {
1147 if (function->impl &&
1148 nir_lower_explicit_io_impl(function->impl, modes, addr_format))
1149 progress = true;
1150 }
1151
1152 return progress;
1153 }
1154
1155 /**
1156 * Return the offset source for a load/store intrinsic.
1157 */
1158 nir_src *
1159 nir_get_io_offset_src(nir_intrinsic_instr *instr)
1160 {
1161 switch (instr->intrinsic) {
1162 case nir_intrinsic_load_input:
1163 case nir_intrinsic_load_output:
1164 case nir_intrinsic_load_shared:
1165 case nir_intrinsic_load_uniform:
1166 case nir_intrinsic_load_global:
1167 return &instr->src[0];
1168 case nir_intrinsic_load_ubo:
1169 case nir_intrinsic_load_ssbo:
1170 case nir_intrinsic_load_per_vertex_input:
1171 case nir_intrinsic_load_per_vertex_output:
1172 case nir_intrinsic_load_interpolated_input:
1173 case nir_intrinsic_store_output:
1174 case nir_intrinsic_store_shared:
1175 case nir_intrinsic_store_global:
1176 return &instr->src[1];
1177 case nir_intrinsic_store_ssbo:
1178 case nir_intrinsic_store_per_vertex_output:
1179 return &instr->src[2];
1180 default:
1181 return NULL;
1182 }
1183 }
1184
1185 /**
1186 * Return the vertex index source for a load/store per_vertex intrinsic.
1187 */
1188 nir_src *
1189 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
1190 {
1191 switch (instr->intrinsic) {
1192 case nir_intrinsic_load_per_vertex_input:
1193 case nir_intrinsic_load_per_vertex_output:
1194 return &instr->src[0];
1195 case nir_intrinsic_store_per_vertex_output:
1196 return &instr->src[1];
1197 default:
1198 return NULL;
1199 }
1200 }