nir/lower_io: Add a 32 and 64-bit global address formats
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37
38 struct lower_io_state {
39 void *dead_ctx;
40 nir_builder builder;
41 int (*type_size)(const struct glsl_type *type);
42 nir_variable_mode modes;
43 nir_lower_io_options options;
44 };
45
46 static nir_intrinsic_op
47 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
48 {
49 switch (deref_op) {
50 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
51 OP(atomic_exchange)
52 OP(atomic_comp_swap)
53 OP(atomic_add)
54 OP(atomic_imin)
55 OP(atomic_umin)
56 OP(atomic_imax)
57 OP(atomic_umax)
58 OP(atomic_and)
59 OP(atomic_or)
60 OP(atomic_xor)
61 OP(atomic_fadd)
62 OP(atomic_fmin)
63 OP(atomic_fmax)
64 OP(atomic_fcomp_swap)
65 #undef OP
66 default:
67 unreachable("Invalid SSBO atomic");
68 }
69 }
70
71 static nir_intrinsic_op
72 global_atomic_for_deref(nir_intrinsic_op deref_op)
73 {
74 switch (deref_op) {
75 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
76 OP(atomic_exchange)
77 OP(atomic_comp_swap)
78 OP(atomic_add)
79 OP(atomic_imin)
80 OP(atomic_umin)
81 OP(atomic_imax)
82 OP(atomic_umax)
83 OP(atomic_and)
84 OP(atomic_or)
85 OP(atomic_xor)
86 OP(atomic_fadd)
87 OP(atomic_fmin)
88 OP(atomic_fmax)
89 OP(atomic_fcomp_swap)
90 #undef OP
91 default:
92 unreachable("Invalid SSBO atomic");
93 }
94 }
95
96 void
97 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
98 int (*type_size)(const struct glsl_type *))
99 {
100 unsigned location = 0;
101
102 nir_foreach_variable(var, var_list) {
103 /*
104 * UBOs have their own address spaces, so don't count them towards the
105 * number of global uniforms
106 */
107 if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo)
108 continue;
109
110 var->data.driver_location = location;
111 location += type_size(var->type);
112 }
113
114 *size = location;
115 }
116
117 /**
118 * Return true if the given variable is a per-vertex input/output array.
119 * (such as geometry shader inputs).
120 */
121 bool
122 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
123 {
124 if (var->data.patch || !glsl_type_is_array(var->type))
125 return false;
126
127 if (var->data.mode == nir_var_shader_in)
128 return stage == MESA_SHADER_GEOMETRY ||
129 stage == MESA_SHADER_TESS_CTRL ||
130 stage == MESA_SHADER_TESS_EVAL;
131
132 if (var->data.mode == nir_var_shader_out)
133 return stage == MESA_SHADER_TESS_CTRL;
134
135 return false;
136 }
137
138 static nir_ssa_def *
139 get_io_offset(nir_builder *b, nir_deref_instr *deref,
140 nir_ssa_def **vertex_index,
141 int (*type_size)(const struct glsl_type *),
142 unsigned *component)
143 {
144 nir_deref_path path;
145 nir_deref_path_init(&path, deref, NULL);
146
147 assert(path.path[0]->deref_type == nir_deref_type_var);
148 nir_deref_instr **p = &path.path[1];
149
150 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
151 * outermost array index separate. Process the rest normally.
152 */
153 if (vertex_index != NULL) {
154 assert((*p)->deref_type == nir_deref_type_array);
155 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
156 p++;
157 }
158
159 if (path.path[0]->var->data.compact) {
160 assert((*p)->deref_type == nir_deref_type_array);
161 assert(glsl_type_is_scalar((*p)->type));
162
163 /* We always lower indirect dereferences for "compact" array vars. */
164 const unsigned index = nir_src_as_uint((*p)->arr.index);
165 const unsigned total_offset = *component + index;
166 const unsigned slot_offset = total_offset / 4;
167 *component = total_offset % 4;
168 return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
169 }
170
171 /* Just emit code and let constant-folding go to town */
172 nir_ssa_def *offset = nir_imm_int(b, 0);
173
174 for (; *p; p++) {
175 if ((*p)->deref_type == nir_deref_type_array) {
176 unsigned size = type_size((*p)->type);
177
178 nir_ssa_def *mul =
179 nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
180
181 offset = nir_iadd(b, offset, mul);
182 } else if ((*p)->deref_type == nir_deref_type_struct) {
183 /* p starts at path[1], so this is safe */
184 nir_deref_instr *parent = *(p - 1);
185
186 unsigned field_offset = 0;
187 for (unsigned i = 0; i < (*p)->strct.index; i++) {
188 field_offset += type_size(glsl_get_struct_field(parent->type, i));
189 }
190 offset = nir_iadd_imm(b, offset, field_offset);
191 } else {
192 unreachable("Unsupported deref type");
193 }
194 }
195
196 nir_deref_path_finish(&path);
197
198 return offset;
199 }
200
201 static nir_intrinsic_instr *
202 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
203 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
204 unsigned component)
205 {
206 const nir_shader *nir = state->builder.shader;
207 nir_variable_mode mode = var->data.mode;
208 nir_ssa_def *barycentric = NULL;
209
210 nir_intrinsic_op op;
211 switch (mode) {
212 case nir_var_shader_in:
213 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
214 nir->options->use_interpolated_input_intrinsics &&
215 var->data.interpolation != INTERP_MODE_FLAT) {
216 assert(vertex_index == NULL);
217
218 nir_intrinsic_op bary_op;
219 if (var->data.sample ||
220 (state->options & nir_lower_io_force_sample_interpolation))
221 bary_op = nir_intrinsic_load_barycentric_sample;
222 else if (var->data.centroid)
223 bary_op = nir_intrinsic_load_barycentric_centroid;
224 else
225 bary_op = nir_intrinsic_load_barycentric_pixel;
226
227 barycentric = nir_load_barycentric(&state->builder, bary_op,
228 var->data.interpolation);
229 op = nir_intrinsic_load_interpolated_input;
230 } else {
231 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
232 nir_intrinsic_load_input;
233 }
234 break;
235 case nir_var_shader_out:
236 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
237 nir_intrinsic_load_output;
238 break;
239 case nir_var_uniform:
240 op = nir_intrinsic_load_uniform;
241 break;
242 case nir_var_mem_shared:
243 op = nir_intrinsic_load_shared;
244 break;
245 default:
246 unreachable("Unknown variable mode");
247 }
248
249 nir_intrinsic_instr *load =
250 nir_intrinsic_instr_create(state->builder.shader, op);
251 load->num_components = intrin->num_components;
252
253 nir_intrinsic_set_base(load, var->data.driver_location);
254 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
255 nir_intrinsic_set_component(load, component);
256
257 if (load->intrinsic == nir_intrinsic_load_uniform)
258 nir_intrinsic_set_range(load, state->type_size(var->type));
259
260 if (vertex_index) {
261 load->src[0] = nir_src_for_ssa(vertex_index);
262 load->src[1] = nir_src_for_ssa(offset);
263 } else if (barycentric) {
264 load->src[0] = nir_src_for_ssa(barycentric);
265 load->src[1] = nir_src_for_ssa(offset);
266 } else {
267 load->src[0] = nir_src_for_ssa(offset);
268 }
269
270 return load;
271 }
272
273 static nir_intrinsic_instr *
274 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
275 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
276 unsigned component)
277 {
278 nir_variable_mode mode = var->data.mode;
279
280 nir_intrinsic_op op;
281 if (mode == nir_var_mem_shared) {
282 op = nir_intrinsic_store_shared;
283 } else {
284 assert(mode == nir_var_shader_out);
285 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
286 nir_intrinsic_store_output;
287 }
288
289 nir_intrinsic_instr *store =
290 nir_intrinsic_instr_create(state->builder.shader, op);
291 store->num_components = intrin->num_components;
292
293 nir_src_copy(&store->src[0], &intrin->src[1], store);
294
295 nir_intrinsic_set_base(store, var->data.driver_location);
296
297 if (mode == nir_var_shader_out)
298 nir_intrinsic_set_component(store, component);
299
300 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
301
302 if (vertex_index)
303 store->src[1] = nir_src_for_ssa(vertex_index);
304
305 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
306
307 return store;
308 }
309
310 static nir_intrinsic_instr *
311 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
312 nir_variable *var, nir_ssa_def *offset)
313 {
314 assert(var->data.mode == nir_var_mem_shared);
315
316 nir_intrinsic_op op;
317 switch (intrin->intrinsic) {
318 #define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break;
319 OP(atomic_exchange)
320 OP(atomic_comp_swap)
321 OP(atomic_add)
322 OP(atomic_imin)
323 OP(atomic_umin)
324 OP(atomic_imax)
325 OP(atomic_umax)
326 OP(atomic_and)
327 OP(atomic_or)
328 OP(atomic_xor)
329 OP(atomic_fadd)
330 OP(atomic_fmin)
331 OP(atomic_fmax)
332 OP(atomic_fcomp_swap)
333 #undef OP
334 default:
335 unreachable("Invalid atomic");
336 }
337
338 nir_intrinsic_instr *atomic =
339 nir_intrinsic_instr_create(state->builder.shader, op);
340
341 nir_intrinsic_set_base(atomic, var->data.driver_location);
342
343 atomic->src[0] = nir_src_for_ssa(offset);
344 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs ==
345 nir_intrinsic_infos[op].num_srcs);
346 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) {
347 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic);
348 }
349
350 return atomic;
351 }
352
353 static nir_intrinsic_instr *
354 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
355 nir_variable *var, nir_ssa_def *offset, unsigned component)
356 {
357 assert(var->data.mode == nir_var_shader_in);
358
359 /* Ignore interpolateAt() for flat variables - flat is flat. */
360 if (var->data.interpolation == INTERP_MODE_FLAT)
361 return lower_load(intrin, state, NULL, var, offset, component);
362
363 nir_intrinsic_op bary_op;
364 switch (intrin->intrinsic) {
365 case nir_intrinsic_interp_deref_at_centroid:
366 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
367 nir_intrinsic_load_barycentric_sample :
368 nir_intrinsic_load_barycentric_centroid;
369 break;
370 case nir_intrinsic_interp_deref_at_sample:
371 bary_op = nir_intrinsic_load_barycentric_at_sample;
372 break;
373 case nir_intrinsic_interp_deref_at_offset:
374 bary_op = nir_intrinsic_load_barycentric_at_offset;
375 break;
376 default:
377 unreachable("Bogus interpolateAt() intrinsic.");
378 }
379
380 nir_intrinsic_instr *bary_setup =
381 nir_intrinsic_instr_create(state->builder.shader, bary_op);
382
383 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
384 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
385
386 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
387 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset)
388 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
389
390 nir_builder_instr_insert(&state->builder, &bary_setup->instr);
391
392 nir_intrinsic_instr *load =
393 nir_intrinsic_instr_create(state->builder.shader,
394 nir_intrinsic_load_interpolated_input);
395 load->num_components = intrin->num_components;
396
397 nir_intrinsic_set_base(load, var->data.driver_location);
398 nir_intrinsic_set_component(load, component);
399
400 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
401 load->src[1] = nir_src_for_ssa(offset);
402
403 return load;
404 }
405
406 static bool
407 nir_lower_io_block(nir_block *block,
408 struct lower_io_state *state)
409 {
410 nir_builder *b = &state->builder;
411 const nir_shader_compiler_options *options = b->shader->options;
412 bool progress = false;
413
414 nir_foreach_instr_safe(instr, block) {
415 if (instr->type != nir_instr_type_intrinsic)
416 continue;
417
418 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
419
420 switch (intrin->intrinsic) {
421 case nir_intrinsic_load_deref:
422 case nir_intrinsic_store_deref:
423 case nir_intrinsic_deref_atomic_add:
424 case nir_intrinsic_deref_atomic_imin:
425 case nir_intrinsic_deref_atomic_umin:
426 case nir_intrinsic_deref_atomic_imax:
427 case nir_intrinsic_deref_atomic_umax:
428 case nir_intrinsic_deref_atomic_and:
429 case nir_intrinsic_deref_atomic_or:
430 case nir_intrinsic_deref_atomic_xor:
431 case nir_intrinsic_deref_atomic_exchange:
432 case nir_intrinsic_deref_atomic_comp_swap:
433 case nir_intrinsic_deref_atomic_fadd:
434 case nir_intrinsic_deref_atomic_fmin:
435 case nir_intrinsic_deref_atomic_fmax:
436 case nir_intrinsic_deref_atomic_fcomp_swap:
437 /* We can lower the io for this nir instrinsic */
438 break;
439 case nir_intrinsic_interp_deref_at_centroid:
440 case nir_intrinsic_interp_deref_at_sample:
441 case nir_intrinsic_interp_deref_at_offset:
442 /* We can optionally lower these to load_interpolated_input */
443 if (options->use_interpolated_input_intrinsics)
444 break;
445 default:
446 /* We can't lower the io for this nir instrinsic, so skip it */
447 continue;
448 }
449
450 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
451
452 nir_variable *var = nir_deref_instr_get_variable(deref);
453 nir_variable_mode mode = var->data.mode;
454
455 if ((state->modes & mode) == 0)
456 continue;
457
458 if (mode != nir_var_shader_in &&
459 mode != nir_var_shader_out &&
460 mode != nir_var_mem_shared &&
461 mode != nir_var_uniform)
462 continue;
463
464 b->cursor = nir_before_instr(instr);
465
466 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
467
468 nir_ssa_def *offset;
469 nir_ssa_def *vertex_index = NULL;
470 unsigned component_offset = var->data.location_frac;
471
472 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
473 state->type_size, &component_offset);
474
475 nir_intrinsic_instr *replacement;
476
477 switch (intrin->intrinsic) {
478 case nir_intrinsic_load_deref:
479 replacement = lower_load(intrin, state, vertex_index, var, offset,
480 component_offset);
481 break;
482
483 case nir_intrinsic_store_deref:
484 replacement = lower_store(intrin, state, vertex_index, var, offset,
485 component_offset);
486 break;
487
488 case nir_intrinsic_deref_atomic_add:
489 case nir_intrinsic_deref_atomic_imin:
490 case nir_intrinsic_deref_atomic_umin:
491 case nir_intrinsic_deref_atomic_imax:
492 case nir_intrinsic_deref_atomic_umax:
493 case nir_intrinsic_deref_atomic_and:
494 case nir_intrinsic_deref_atomic_or:
495 case nir_intrinsic_deref_atomic_xor:
496 case nir_intrinsic_deref_atomic_exchange:
497 case nir_intrinsic_deref_atomic_comp_swap:
498 case nir_intrinsic_deref_atomic_fadd:
499 case nir_intrinsic_deref_atomic_fmin:
500 case nir_intrinsic_deref_atomic_fmax:
501 case nir_intrinsic_deref_atomic_fcomp_swap:
502 assert(vertex_index == NULL);
503 replacement = lower_atomic(intrin, state, var, offset);
504 break;
505
506 case nir_intrinsic_interp_deref_at_centroid:
507 case nir_intrinsic_interp_deref_at_sample:
508 case nir_intrinsic_interp_deref_at_offset:
509 assert(vertex_index == NULL);
510 replacement = lower_interpolate_at(intrin, state, var, offset,
511 component_offset);
512 break;
513
514 default:
515 continue;
516 }
517
518 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
519 if (intrin->dest.is_ssa) {
520 nir_ssa_dest_init(&replacement->instr, &replacement->dest,
521 intrin->dest.ssa.num_components,
522 intrin->dest.ssa.bit_size, NULL);
523 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
524 nir_src_for_ssa(&replacement->dest.ssa));
525 } else {
526 nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr);
527 }
528 }
529
530 nir_instr_insert_before(&intrin->instr, &replacement->instr);
531 nir_instr_remove(&intrin->instr);
532 progress = true;
533 }
534
535 return progress;
536 }
537
538 static bool
539 nir_lower_io_impl(nir_function_impl *impl,
540 nir_variable_mode modes,
541 int (*type_size)(const struct glsl_type *),
542 nir_lower_io_options options)
543 {
544 struct lower_io_state state;
545 bool progress = false;
546
547 nir_builder_init(&state.builder, impl);
548 state.dead_ctx = ralloc_context(NULL);
549 state.modes = modes;
550 state.type_size = type_size;
551 state.options = options;
552
553 nir_foreach_block(block, impl) {
554 progress |= nir_lower_io_block(block, &state);
555 }
556
557 ralloc_free(state.dead_ctx);
558
559 nir_metadata_preserve(impl, nir_metadata_block_index |
560 nir_metadata_dominance);
561 return progress;
562 }
563
564 bool
565 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
566 int (*type_size)(const struct glsl_type *),
567 nir_lower_io_options options)
568 {
569 bool progress = false;
570
571 nir_foreach_function(function, shader) {
572 if (function->impl) {
573 progress |= nir_lower_io_impl(function->impl, modes,
574 type_size, options);
575 }
576 }
577
578 return progress;
579 }
580
581 static unsigned
582 type_scalar_size_bytes(const struct glsl_type *type)
583 {
584 assert(glsl_type_is_vector_or_scalar(type) ||
585 glsl_type_is_matrix(type));
586 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
587 }
588
589 static nir_ssa_def *
590 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
591 nir_address_format addr_format, nir_ssa_def *offset)
592 {
593 assert(offset->num_components == 1);
594 assert(addr->bit_size == offset->bit_size);
595
596 switch (addr_format) {
597 case nir_address_format_32bit_global:
598 case nir_address_format_64bit_global:
599 assert(addr->num_components == 1);
600 return nir_iadd(b, addr, offset);
601
602 case nir_address_format_vk_index_offset:
603 assert(addr->num_components == 2);
604 return nir_vec2(b, nir_channel(b, addr, 0),
605 nir_iadd(b, nir_channel(b, addr, 1), offset));
606 }
607 unreachable("Invalid address format");
608 }
609
610 static nir_ssa_def *
611 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
612 nir_address_format addr_format, int64_t offset)
613 {
614 return build_addr_iadd(b, addr, addr_format,
615 nir_imm_intN_t(b, offset, addr->bit_size));
616 }
617
618 static nir_ssa_def *
619 addr_to_index(nir_builder *b, nir_ssa_def *addr,
620 nir_address_format addr_format)
621 {
622 assert(addr_format == nir_address_format_vk_index_offset);
623 assert(addr->num_components == 2);
624 return nir_channel(b, addr, 0);
625 }
626
627 static nir_ssa_def *
628 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
629 nir_address_format addr_format)
630 {
631 assert(addr_format == nir_address_format_vk_index_offset);
632 assert(addr->num_components == 2);
633 return nir_channel(b, addr, 1);
634 }
635
636 /** Returns true if the given address format resolves to a global address */
637 static bool
638 addr_format_is_global(nir_address_format addr_format)
639 {
640 return addr_format == nir_address_format_32bit_global ||
641 addr_format == nir_address_format_64bit_global;
642 }
643
644 static nir_ssa_def *
645 addr_to_global(nir_builder *b, nir_ssa_def *addr,
646 nir_address_format addr_format)
647 {
648 switch (addr_format) {
649 case nir_address_format_32bit_global:
650 case nir_address_format_64bit_global:
651 assert(addr->num_components == 1);
652 return addr;
653
654 case nir_address_format_vk_index_offset:
655 unreachable("Cannot get a 64-bit address with this address format");
656 }
657
658 unreachable("Invalid address format");
659 }
660
661 static nir_ssa_def *
662 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
663 nir_ssa_def *addr, nir_address_format addr_format,
664 unsigned num_components)
665 {
666 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
667
668 nir_intrinsic_op op;
669 switch (mode) {
670 case nir_var_mem_ubo:
671 op = nir_intrinsic_load_ubo;
672 break;
673 case nir_var_mem_ssbo:
674 if (addr_format_is_global(addr_format))
675 op = nir_intrinsic_load_global;
676 else
677 op = nir_intrinsic_load_ssbo;
678 break;
679 default:
680 unreachable("Unsupported explicit IO variable mode");
681 }
682
683 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
684
685 if (addr_format_is_global(addr_format)) {
686 assert(op == nir_intrinsic_load_global);
687 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
688 } else {
689 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
690 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
691 }
692
693 if (mode != nir_var_mem_ubo)
694 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
695
696 /* TODO: We should try and provide a better alignment. For OpenCL, we need
697 * to plumb the alignment through from SPIR-V when we have one.
698 */
699 nir_intrinsic_set_align(load, intrin->dest.ssa.bit_size / 8, 0);
700
701 assert(intrin->dest.is_ssa);
702 load->num_components = num_components;
703 nir_ssa_dest_init(&load->instr, &load->dest, num_components,
704 intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
705 nir_builder_instr_insert(b, &load->instr);
706
707 return &load->dest.ssa;
708 }
709
710 static void
711 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
712 nir_ssa_def *addr, nir_address_format addr_format,
713 nir_ssa_def *value, nir_component_mask_t write_mask)
714 {
715 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
716
717 nir_intrinsic_op op;
718 switch (mode) {
719 case nir_var_mem_ssbo:
720 if (addr_format_is_global(addr_format))
721 op = nir_intrinsic_store_global;
722 else
723 op = nir_intrinsic_store_ssbo;
724 break;
725 default:
726 unreachable("Unsupported explicit IO variable mode");
727 }
728
729 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
730
731 store->src[0] = nir_src_for_ssa(value);
732 if (addr_format_is_global(addr_format)) {
733 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
734 } else {
735 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
736 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
737 }
738
739 nir_intrinsic_set_write_mask(store, write_mask);
740
741 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
742
743 /* TODO: We should try and provide a better alignment. For OpenCL, we need
744 * to plumb the alignment through from SPIR-V when we have one.
745 */
746 nir_intrinsic_set_align(store, value->bit_size / 8, 0);
747
748 assert(value->num_components == 1 ||
749 value->num_components == intrin->num_components);
750 store->num_components = value->num_components;
751 nir_builder_instr_insert(b, &store->instr);
752 }
753
754 static nir_ssa_def *
755 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
756 nir_ssa_def *addr, nir_address_format addr_format)
757 {
758 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
759 const unsigned num_data_srcs =
760 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
761
762 nir_intrinsic_op op;
763 switch (mode) {
764 case nir_var_mem_ssbo:
765 if (addr_format_is_global(addr_format))
766 op = global_atomic_for_deref(intrin->intrinsic);
767 else
768 op = ssbo_atomic_for_deref(intrin->intrinsic);
769 break;
770 default:
771 unreachable("Unsupported explicit IO variable mode");
772 }
773
774 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
775
776 unsigned src = 0;
777 if (addr_format_is_global(addr_format)) {
778 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
779 } else {
780 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
781 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
782 }
783 for (unsigned i = 0; i < num_data_srcs; i++) {
784 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
785 }
786
787 assert(intrin->dest.ssa.num_components == 1);
788 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
789 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
790 nir_builder_instr_insert(b, &atomic->instr);
791
792 return &atomic->dest.ssa;
793 }
794
795 static void
796 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
797 nir_address_format addr_format)
798 {
799 /* Just delete the deref if it's not used. We can't use
800 * nir_deref_instr_remove_if_unused here because it may remove more than
801 * one deref which could break our list walking since we walk the list
802 * backwards.
803 */
804 assert(list_empty(&deref->dest.ssa.if_uses));
805 if (list_empty(&deref->dest.ssa.uses)) {
806 nir_instr_remove(&deref->instr);
807 return;
808 }
809
810 b->cursor = nir_after_instr(&deref->instr);
811
812 /* Var derefs must be lowered away by the driver */
813 assert(deref->deref_type != nir_deref_type_var);
814
815 assert(deref->parent.is_ssa);
816 nir_ssa_def *parent_addr = deref->parent.ssa;
817
818 nir_ssa_def *addr = NULL;
819 assert(deref->dest.is_ssa);
820 switch (deref->deref_type) {
821 case nir_deref_type_var:
822 unreachable("Must be lowered by the driver");
823 break;
824
825 case nir_deref_type_array: {
826 nir_deref_instr *parent = nir_deref_instr_parent(deref);
827
828 unsigned stride = glsl_get_explicit_stride(parent->type);
829 if ((glsl_type_is_matrix(parent->type) &&
830 glsl_matrix_type_is_row_major(parent->type)) ||
831 (glsl_type_is_vector(parent->type) && stride == 0))
832 stride = type_scalar_size_bytes(parent->type);
833
834 assert(stride > 0);
835
836 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
837 index = nir_i2i(b, index, parent_addr->bit_size);
838 addr = build_addr_iadd(b, parent_addr, addr_format,
839 nir_imul_imm(b, index, stride));
840 break;
841 }
842
843 case nir_deref_type_ptr_as_array: {
844 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
845 index = nir_i2i(b, index, parent_addr->bit_size);
846 unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
847 addr = build_addr_iadd(b, parent_addr, addr_format,
848 nir_imul_imm(b, index, stride));
849 break;
850 }
851
852 case nir_deref_type_array_wildcard:
853 unreachable("Wildcards should be lowered by now");
854 break;
855
856 case nir_deref_type_struct: {
857 nir_deref_instr *parent = nir_deref_instr_parent(deref);
858 int offset = glsl_get_struct_field_offset(parent->type,
859 deref->strct.index);
860 assert(offset >= 0);
861 addr = build_addr_iadd_imm(b, parent_addr, addr_format, offset);
862 break;
863 }
864
865 case nir_deref_type_cast:
866 /* Nothing to do here */
867 addr = parent_addr;
868 break;
869 }
870
871 nir_instr_remove(&deref->instr);
872 nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
873 }
874
875 static void
876 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
877 nir_address_format addr_format)
878 {
879 b->cursor = nir_after_instr(&intrin->instr);
880
881 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
882 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
883 unsigned scalar_size = type_scalar_size_bytes(deref->type);
884 assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
885 assert(vec_stride == 0 || vec_stride >= scalar_size);
886
887 nir_ssa_def *addr = &deref->dest.ssa;
888 if (intrin->intrinsic == nir_intrinsic_load_deref) {
889 nir_ssa_def *value;
890 if (vec_stride > scalar_size) {
891 nir_ssa_def *comps[4] = { NULL, };
892 for (unsigned i = 0; i < intrin->num_components; i++) {
893 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
894 vec_stride * i);
895 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
896 addr_format, 1);
897 }
898 value = nir_vec(b, comps, intrin->num_components);
899 } else {
900 value = build_explicit_io_load(b, intrin, addr, addr_format,
901 intrin->num_components);
902 }
903 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
904 } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
905 assert(intrin->src[1].is_ssa);
906 nir_ssa_def *value = intrin->src[1].ssa;
907 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
908 if (vec_stride > scalar_size) {
909 for (unsigned i = 0; i < intrin->num_components; i++) {
910 if (!(write_mask & (1 << i)))
911 continue;
912
913 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
914 vec_stride * i);
915 build_explicit_io_store(b, intrin, comp_addr, addr_format,
916 nir_channel(b, value, i), 1);
917 }
918 } else {
919 build_explicit_io_store(b, intrin, addr, addr_format,
920 value, write_mask);
921 }
922 } else {
923 nir_ssa_def *value =
924 build_explicit_io_atomic(b, intrin, addr, addr_format);
925 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
926 }
927
928 nir_instr_remove(&intrin->instr);
929 }
930
931 static bool
932 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
933 nir_address_format addr_format)
934 {
935 bool progress = false;
936
937 nir_builder b;
938 nir_builder_init(&b, impl);
939
940 /* Walk in reverse order so that we can see the full deref chain when we
941 * lower the access operations. We lower them assuming that the derefs
942 * will be turned into address calculations later.
943 */
944 nir_foreach_block_reverse(block, impl) {
945 nir_foreach_instr_reverse_safe(instr, block) {
946 switch (instr->type) {
947 case nir_instr_type_deref: {
948 nir_deref_instr *deref = nir_instr_as_deref(instr);
949 if (deref->mode & modes) {
950 lower_explicit_io_deref(&b, deref, addr_format);
951 progress = true;
952 }
953 break;
954 }
955
956 case nir_instr_type_intrinsic: {
957 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
958 switch (intrin->intrinsic) {
959 case nir_intrinsic_load_deref:
960 case nir_intrinsic_store_deref:
961 case nir_intrinsic_deref_atomic_add:
962 case nir_intrinsic_deref_atomic_imin:
963 case nir_intrinsic_deref_atomic_umin:
964 case nir_intrinsic_deref_atomic_imax:
965 case nir_intrinsic_deref_atomic_umax:
966 case nir_intrinsic_deref_atomic_and:
967 case nir_intrinsic_deref_atomic_or:
968 case nir_intrinsic_deref_atomic_xor:
969 case nir_intrinsic_deref_atomic_exchange:
970 case nir_intrinsic_deref_atomic_comp_swap:
971 case nir_intrinsic_deref_atomic_fadd:
972 case nir_intrinsic_deref_atomic_fmin:
973 case nir_intrinsic_deref_atomic_fmax:
974 case nir_intrinsic_deref_atomic_fcomp_swap: {
975 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
976 if (deref->mode & modes) {
977 lower_explicit_io_access(&b, intrin, addr_format);
978 progress = true;
979 }
980 break;
981 }
982
983 default:
984 break;
985 }
986 break;
987 }
988
989 default:
990 /* Nothing to do */
991 break;
992 }
993 }
994 }
995
996 if (progress) {
997 nir_metadata_preserve(impl, nir_metadata_block_index |
998 nir_metadata_dominance);
999 }
1000
1001 return progress;
1002 }
1003
1004 bool
1005 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
1006 nir_address_format addr_format)
1007 {
1008 bool progress = false;
1009
1010 nir_foreach_function(function, shader) {
1011 if (function->impl &&
1012 nir_lower_explicit_io_impl(function->impl, modes, addr_format))
1013 progress = true;
1014 }
1015
1016 return progress;
1017 }
1018
1019 /**
1020 * Return the offset source for a load/store intrinsic.
1021 */
1022 nir_src *
1023 nir_get_io_offset_src(nir_intrinsic_instr *instr)
1024 {
1025 switch (instr->intrinsic) {
1026 case nir_intrinsic_load_input:
1027 case nir_intrinsic_load_output:
1028 case nir_intrinsic_load_shared:
1029 case nir_intrinsic_load_uniform:
1030 case nir_intrinsic_load_global:
1031 return &instr->src[0];
1032 case nir_intrinsic_load_ubo:
1033 case nir_intrinsic_load_ssbo:
1034 case nir_intrinsic_load_per_vertex_input:
1035 case nir_intrinsic_load_per_vertex_output:
1036 case nir_intrinsic_load_interpolated_input:
1037 case nir_intrinsic_store_output:
1038 case nir_intrinsic_store_shared:
1039 case nir_intrinsic_store_global:
1040 return &instr->src[1];
1041 case nir_intrinsic_store_ssbo:
1042 case nir_intrinsic_store_per_vertex_output:
1043 return &instr->src[2];
1044 default:
1045 return NULL;
1046 }
1047 }
1048
1049 /**
1050 * Return the vertex index source for a load/store per_vertex intrinsic.
1051 */
1052 nir_src *
1053 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
1054 {
1055 switch (instr->intrinsic) {
1056 case nir_intrinsic_load_per_vertex_input:
1057 case nir_intrinsic_load_per_vertex_output:
1058 return &instr->src[0];
1059 case nir_intrinsic_store_per_vertex_output:
1060 return &instr->src[1];
1061 default:
1062 return NULL;
1063 }
1064 }