nir: Rename nir_address_format_vk_index_offset to not be vk
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37
38 struct lower_io_state {
39 void *dead_ctx;
40 nir_builder builder;
41 int (*type_size)(const struct glsl_type *type);
42 nir_variable_mode modes;
43 nir_lower_io_options options;
44 };
45
46 static nir_intrinsic_op
47 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
48 {
49 switch (deref_op) {
50 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
51 OP(atomic_exchange)
52 OP(atomic_comp_swap)
53 OP(atomic_add)
54 OP(atomic_imin)
55 OP(atomic_umin)
56 OP(atomic_imax)
57 OP(atomic_umax)
58 OP(atomic_and)
59 OP(atomic_or)
60 OP(atomic_xor)
61 OP(atomic_fadd)
62 OP(atomic_fmin)
63 OP(atomic_fmax)
64 OP(atomic_fcomp_swap)
65 #undef OP
66 default:
67 unreachable("Invalid SSBO atomic");
68 }
69 }
70
71 static nir_intrinsic_op
72 global_atomic_for_deref(nir_intrinsic_op deref_op)
73 {
74 switch (deref_op) {
75 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
76 OP(atomic_exchange)
77 OP(atomic_comp_swap)
78 OP(atomic_add)
79 OP(atomic_imin)
80 OP(atomic_umin)
81 OP(atomic_imax)
82 OP(atomic_umax)
83 OP(atomic_and)
84 OP(atomic_or)
85 OP(atomic_xor)
86 OP(atomic_fadd)
87 OP(atomic_fmin)
88 OP(atomic_fmax)
89 OP(atomic_fcomp_swap)
90 #undef OP
91 default:
92 unreachable("Invalid SSBO atomic");
93 }
94 }
95
96 void
97 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
98 int (*type_size)(const struct glsl_type *))
99 {
100 unsigned location = 0;
101
102 nir_foreach_variable(var, var_list) {
103 /*
104 * UBOs have their own address spaces, so don't count them towards the
105 * number of global uniforms
106 */
107 if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo)
108 continue;
109
110 var->data.driver_location = location;
111 location += type_size(var->type);
112 }
113
114 *size = location;
115 }
116
117 /**
118 * Return true if the given variable is a per-vertex input/output array.
119 * (such as geometry shader inputs).
120 */
121 bool
122 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
123 {
124 if (var->data.patch || !glsl_type_is_array(var->type))
125 return false;
126
127 if (var->data.mode == nir_var_shader_in)
128 return stage == MESA_SHADER_GEOMETRY ||
129 stage == MESA_SHADER_TESS_CTRL ||
130 stage == MESA_SHADER_TESS_EVAL;
131
132 if (var->data.mode == nir_var_shader_out)
133 return stage == MESA_SHADER_TESS_CTRL;
134
135 return false;
136 }
137
138 static nir_ssa_def *
139 get_io_offset(nir_builder *b, nir_deref_instr *deref,
140 nir_ssa_def **vertex_index,
141 int (*type_size)(const struct glsl_type *),
142 unsigned *component)
143 {
144 nir_deref_path path;
145 nir_deref_path_init(&path, deref, NULL);
146
147 assert(path.path[0]->deref_type == nir_deref_type_var);
148 nir_deref_instr **p = &path.path[1];
149
150 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
151 * outermost array index separate. Process the rest normally.
152 */
153 if (vertex_index != NULL) {
154 assert((*p)->deref_type == nir_deref_type_array);
155 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
156 p++;
157 }
158
159 if (path.path[0]->var->data.compact) {
160 assert((*p)->deref_type == nir_deref_type_array);
161 assert(glsl_type_is_scalar((*p)->type));
162
163 /* We always lower indirect dereferences for "compact" array vars. */
164 const unsigned index = nir_src_as_uint((*p)->arr.index);
165 const unsigned total_offset = *component + index;
166 const unsigned slot_offset = total_offset / 4;
167 *component = total_offset % 4;
168 return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
169 }
170
171 /* Just emit code and let constant-folding go to town */
172 nir_ssa_def *offset = nir_imm_int(b, 0);
173
174 for (; *p; p++) {
175 if ((*p)->deref_type == nir_deref_type_array) {
176 unsigned size = type_size((*p)->type);
177
178 nir_ssa_def *mul =
179 nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
180
181 offset = nir_iadd(b, offset, mul);
182 } else if ((*p)->deref_type == nir_deref_type_struct) {
183 /* p starts at path[1], so this is safe */
184 nir_deref_instr *parent = *(p - 1);
185
186 unsigned field_offset = 0;
187 for (unsigned i = 0; i < (*p)->strct.index; i++) {
188 field_offset += type_size(glsl_get_struct_field(parent->type, i));
189 }
190 offset = nir_iadd_imm(b, offset, field_offset);
191 } else {
192 unreachable("Unsupported deref type");
193 }
194 }
195
196 nir_deref_path_finish(&path);
197
198 return offset;
199 }
200
201 static nir_intrinsic_instr *
202 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
203 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
204 unsigned component)
205 {
206 const nir_shader *nir = state->builder.shader;
207 nir_variable_mode mode = var->data.mode;
208 nir_ssa_def *barycentric = NULL;
209
210 nir_intrinsic_op op;
211 switch (mode) {
212 case nir_var_shader_in:
213 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
214 nir->options->use_interpolated_input_intrinsics &&
215 var->data.interpolation != INTERP_MODE_FLAT) {
216 assert(vertex_index == NULL);
217
218 nir_intrinsic_op bary_op;
219 if (var->data.sample ||
220 (state->options & nir_lower_io_force_sample_interpolation))
221 bary_op = nir_intrinsic_load_barycentric_sample;
222 else if (var->data.centroid)
223 bary_op = nir_intrinsic_load_barycentric_centroid;
224 else
225 bary_op = nir_intrinsic_load_barycentric_pixel;
226
227 barycentric = nir_load_barycentric(&state->builder, bary_op,
228 var->data.interpolation);
229 op = nir_intrinsic_load_interpolated_input;
230 } else {
231 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
232 nir_intrinsic_load_input;
233 }
234 break;
235 case nir_var_shader_out:
236 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
237 nir_intrinsic_load_output;
238 break;
239 case nir_var_uniform:
240 op = nir_intrinsic_load_uniform;
241 break;
242 case nir_var_mem_shared:
243 op = nir_intrinsic_load_shared;
244 break;
245 default:
246 unreachable("Unknown variable mode");
247 }
248
249 nir_intrinsic_instr *load =
250 nir_intrinsic_instr_create(state->builder.shader, op);
251 load->num_components = intrin->num_components;
252
253 nir_intrinsic_set_base(load, var->data.driver_location);
254 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
255 nir_intrinsic_set_component(load, component);
256
257 if (load->intrinsic == nir_intrinsic_load_uniform)
258 nir_intrinsic_set_range(load, state->type_size(var->type));
259
260 if (vertex_index) {
261 load->src[0] = nir_src_for_ssa(vertex_index);
262 load->src[1] = nir_src_for_ssa(offset);
263 } else if (barycentric) {
264 load->src[0] = nir_src_for_ssa(barycentric);
265 load->src[1] = nir_src_for_ssa(offset);
266 } else {
267 load->src[0] = nir_src_for_ssa(offset);
268 }
269
270 return load;
271 }
272
273 static nir_intrinsic_instr *
274 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
275 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
276 unsigned component)
277 {
278 nir_variable_mode mode = var->data.mode;
279
280 nir_intrinsic_op op;
281 if (mode == nir_var_mem_shared) {
282 op = nir_intrinsic_store_shared;
283 } else {
284 assert(mode == nir_var_shader_out);
285 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
286 nir_intrinsic_store_output;
287 }
288
289 nir_intrinsic_instr *store =
290 nir_intrinsic_instr_create(state->builder.shader, op);
291 store->num_components = intrin->num_components;
292
293 nir_src_copy(&store->src[0], &intrin->src[1], store);
294
295 nir_intrinsic_set_base(store, var->data.driver_location);
296
297 if (mode == nir_var_shader_out)
298 nir_intrinsic_set_component(store, component);
299
300 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
301
302 if (vertex_index)
303 store->src[1] = nir_src_for_ssa(vertex_index);
304
305 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
306
307 return store;
308 }
309
310 static nir_intrinsic_instr *
311 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
312 nir_variable *var, nir_ssa_def *offset)
313 {
314 assert(var->data.mode == nir_var_mem_shared);
315
316 nir_intrinsic_op op;
317 switch (intrin->intrinsic) {
318 #define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break;
319 OP(atomic_exchange)
320 OP(atomic_comp_swap)
321 OP(atomic_add)
322 OP(atomic_imin)
323 OP(atomic_umin)
324 OP(atomic_imax)
325 OP(atomic_umax)
326 OP(atomic_and)
327 OP(atomic_or)
328 OP(atomic_xor)
329 OP(atomic_fadd)
330 OP(atomic_fmin)
331 OP(atomic_fmax)
332 OP(atomic_fcomp_swap)
333 #undef OP
334 default:
335 unreachable("Invalid atomic");
336 }
337
338 nir_intrinsic_instr *atomic =
339 nir_intrinsic_instr_create(state->builder.shader, op);
340
341 nir_intrinsic_set_base(atomic, var->data.driver_location);
342
343 atomic->src[0] = nir_src_for_ssa(offset);
344 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs ==
345 nir_intrinsic_infos[op].num_srcs);
346 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) {
347 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic);
348 }
349
350 return atomic;
351 }
352
353 static nir_intrinsic_instr *
354 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
355 nir_variable *var, nir_ssa_def *offset, unsigned component)
356 {
357 assert(var->data.mode == nir_var_shader_in);
358
359 /* Ignore interpolateAt() for flat variables - flat is flat. */
360 if (var->data.interpolation == INTERP_MODE_FLAT)
361 return lower_load(intrin, state, NULL, var, offset, component);
362
363 nir_intrinsic_op bary_op;
364 switch (intrin->intrinsic) {
365 case nir_intrinsic_interp_deref_at_centroid:
366 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
367 nir_intrinsic_load_barycentric_sample :
368 nir_intrinsic_load_barycentric_centroid;
369 break;
370 case nir_intrinsic_interp_deref_at_sample:
371 bary_op = nir_intrinsic_load_barycentric_at_sample;
372 break;
373 case nir_intrinsic_interp_deref_at_offset:
374 bary_op = nir_intrinsic_load_barycentric_at_offset;
375 break;
376 default:
377 unreachable("Bogus interpolateAt() intrinsic.");
378 }
379
380 nir_intrinsic_instr *bary_setup =
381 nir_intrinsic_instr_create(state->builder.shader, bary_op);
382
383 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
384 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
385
386 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
387 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset)
388 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
389
390 nir_builder_instr_insert(&state->builder, &bary_setup->instr);
391
392 nir_intrinsic_instr *load =
393 nir_intrinsic_instr_create(state->builder.shader,
394 nir_intrinsic_load_interpolated_input);
395 load->num_components = intrin->num_components;
396
397 nir_intrinsic_set_base(load, var->data.driver_location);
398 nir_intrinsic_set_component(load, component);
399
400 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
401 load->src[1] = nir_src_for_ssa(offset);
402
403 return load;
404 }
405
406 static bool
407 nir_lower_io_block(nir_block *block,
408 struct lower_io_state *state)
409 {
410 nir_builder *b = &state->builder;
411 const nir_shader_compiler_options *options = b->shader->options;
412 bool progress = false;
413
414 nir_foreach_instr_safe(instr, block) {
415 if (instr->type != nir_instr_type_intrinsic)
416 continue;
417
418 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
419
420 switch (intrin->intrinsic) {
421 case nir_intrinsic_load_deref:
422 case nir_intrinsic_store_deref:
423 case nir_intrinsic_deref_atomic_add:
424 case nir_intrinsic_deref_atomic_imin:
425 case nir_intrinsic_deref_atomic_umin:
426 case nir_intrinsic_deref_atomic_imax:
427 case nir_intrinsic_deref_atomic_umax:
428 case nir_intrinsic_deref_atomic_and:
429 case nir_intrinsic_deref_atomic_or:
430 case nir_intrinsic_deref_atomic_xor:
431 case nir_intrinsic_deref_atomic_exchange:
432 case nir_intrinsic_deref_atomic_comp_swap:
433 case nir_intrinsic_deref_atomic_fadd:
434 case nir_intrinsic_deref_atomic_fmin:
435 case nir_intrinsic_deref_atomic_fmax:
436 case nir_intrinsic_deref_atomic_fcomp_swap:
437 /* We can lower the io for this nir instrinsic */
438 break;
439 case nir_intrinsic_interp_deref_at_centroid:
440 case nir_intrinsic_interp_deref_at_sample:
441 case nir_intrinsic_interp_deref_at_offset:
442 /* We can optionally lower these to load_interpolated_input */
443 if (options->use_interpolated_input_intrinsics)
444 break;
445 default:
446 /* We can't lower the io for this nir instrinsic, so skip it */
447 continue;
448 }
449
450 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
451
452 nir_variable *var = nir_deref_instr_get_variable(deref);
453 nir_variable_mode mode = var->data.mode;
454
455 if ((state->modes & mode) == 0)
456 continue;
457
458 if (mode != nir_var_shader_in &&
459 mode != nir_var_shader_out &&
460 mode != nir_var_mem_shared &&
461 mode != nir_var_uniform)
462 continue;
463
464 b->cursor = nir_before_instr(instr);
465
466 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
467
468 nir_ssa_def *offset;
469 nir_ssa_def *vertex_index = NULL;
470 unsigned component_offset = var->data.location_frac;
471
472 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
473 state->type_size, &component_offset);
474
475 nir_intrinsic_instr *replacement;
476
477 switch (intrin->intrinsic) {
478 case nir_intrinsic_load_deref:
479 replacement = lower_load(intrin, state, vertex_index, var, offset,
480 component_offset);
481 break;
482
483 case nir_intrinsic_store_deref:
484 replacement = lower_store(intrin, state, vertex_index, var, offset,
485 component_offset);
486 break;
487
488 case nir_intrinsic_deref_atomic_add:
489 case nir_intrinsic_deref_atomic_imin:
490 case nir_intrinsic_deref_atomic_umin:
491 case nir_intrinsic_deref_atomic_imax:
492 case nir_intrinsic_deref_atomic_umax:
493 case nir_intrinsic_deref_atomic_and:
494 case nir_intrinsic_deref_atomic_or:
495 case nir_intrinsic_deref_atomic_xor:
496 case nir_intrinsic_deref_atomic_exchange:
497 case nir_intrinsic_deref_atomic_comp_swap:
498 case nir_intrinsic_deref_atomic_fadd:
499 case nir_intrinsic_deref_atomic_fmin:
500 case nir_intrinsic_deref_atomic_fmax:
501 case nir_intrinsic_deref_atomic_fcomp_swap:
502 assert(vertex_index == NULL);
503 replacement = lower_atomic(intrin, state, var, offset);
504 break;
505
506 case nir_intrinsic_interp_deref_at_centroid:
507 case nir_intrinsic_interp_deref_at_sample:
508 case nir_intrinsic_interp_deref_at_offset:
509 assert(vertex_index == NULL);
510 replacement = lower_interpolate_at(intrin, state, var, offset,
511 component_offset);
512 break;
513
514 default:
515 continue;
516 }
517
518 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
519 if (intrin->dest.is_ssa) {
520 nir_ssa_dest_init(&replacement->instr, &replacement->dest,
521 intrin->dest.ssa.num_components,
522 intrin->dest.ssa.bit_size, NULL);
523 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
524 nir_src_for_ssa(&replacement->dest.ssa));
525 } else {
526 nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr);
527 }
528 }
529
530 nir_instr_insert_before(&intrin->instr, &replacement->instr);
531 nir_instr_remove(&intrin->instr);
532 progress = true;
533 }
534
535 return progress;
536 }
537
538 static bool
539 nir_lower_io_impl(nir_function_impl *impl,
540 nir_variable_mode modes,
541 int (*type_size)(const struct glsl_type *),
542 nir_lower_io_options options)
543 {
544 struct lower_io_state state;
545 bool progress = false;
546
547 nir_builder_init(&state.builder, impl);
548 state.dead_ctx = ralloc_context(NULL);
549 state.modes = modes;
550 state.type_size = type_size;
551 state.options = options;
552
553 nir_foreach_block(block, impl) {
554 progress |= nir_lower_io_block(block, &state);
555 }
556
557 ralloc_free(state.dead_ctx);
558
559 nir_metadata_preserve(impl, nir_metadata_block_index |
560 nir_metadata_dominance);
561 return progress;
562 }
563
564 bool
565 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
566 int (*type_size)(const struct glsl_type *),
567 nir_lower_io_options options)
568 {
569 bool progress = false;
570
571 nir_foreach_function(function, shader) {
572 if (function->impl) {
573 progress |= nir_lower_io_impl(function->impl, modes,
574 type_size, options);
575 }
576 }
577
578 return progress;
579 }
580
581 static unsigned
582 type_scalar_size_bytes(const struct glsl_type *type)
583 {
584 assert(glsl_type_is_vector_or_scalar(type) ||
585 glsl_type_is_matrix(type));
586 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
587 }
588
589 static nir_ssa_def *
590 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
591 nir_address_format addr_format, nir_ssa_def *offset)
592 {
593 assert(offset->num_components == 1);
594 assert(addr->bit_size == offset->bit_size);
595
596 switch (addr_format) {
597 case nir_address_format_32bit_global:
598 case nir_address_format_64bit_global:
599 assert(addr->num_components == 1);
600 return nir_iadd(b, addr, offset);
601
602 case nir_address_format_32bit_index_offset:
603 assert(addr->num_components == 2);
604 return nir_vec2(b, nir_channel(b, addr, 0),
605 nir_iadd(b, nir_channel(b, addr, 1), offset));
606 }
607 unreachable("Invalid address format");
608 }
609
610 static nir_ssa_def *
611 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
612 nir_address_format addr_format, int64_t offset)
613 {
614 return build_addr_iadd(b, addr, addr_format,
615 nir_imm_intN_t(b, offset, addr->bit_size));
616 }
617
618 static nir_ssa_def *
619 addr_to_index(nir_builder *b, nir_ssa_def *addr,
620 nir_address_format addr_format)
621 {
622 assert(addr_format == nir_address_format_32bit_index_offset);
623 assert(addr->num_components == 2);
624 return nir_channel(b, addr, 0);
625 }
626
627 static nir_ssa_def *
628 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
629 nir_address_format addr_format)
630 {
631 assert(addr_format == nir_address_format_32bit_index_offset);
632 assert(addr->num_components == 2);
633 return nir_channel(b, addr, 1);
634 }
635
636 /** Returns true if the given address format resolves to a global address */
637 static bool
638 addr_format_is_global(nir_address_format addr_format)
639 {
640 return addr_format == nir_address_format_32bit_global ||
641 addr_format == nir_address_format_64bit_global;
642 }
643
644 static nir_ssa_def *
645 addr_to_global(nir_builder *b, nir_ssa_def *addr,
646 nir_address_format addr_format)
647 {
648 switch (addr_format) {
649 case nir_address_format_32bit_global:
650 case nir_address_format_64bit_global:
651 assert(addr->num_components == 1);
652 return addr;
653
654 case nir_address_format_32bit_index_offset:
655 unreachable("Cannot get a 64-bit address with this address format");
656 }
657
658 unreachable("Invalid address format");
659 }
660
661 static nir_ssa_def *
662 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
663 nir_ssa_def *addr, nir_address_format addr_format,
664 unsigned num_components)
665 {
666 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
667
668 nir_intrinsic_op op;
669 switch (mode) {
670 case nir_var_mem_ubo:
671 op = nir_intrinsic_load_ubo;
672 break;
673 case nir_var_mem_ssbo:
674 if (addr_format_is_global(addr_format))
675 op = nir_intrinsic_load_global;
676 else
677 op = nir_intrinsic_load_ssbo;
678 break;
679 case nir_var_mem_global:
680 assert(addr_format_is_global(addr_format));
681 op = nir_intrinsic_load_global;
682 break;
683 default:
684 unreachable("Unsupported explicit IO variable mode");
685 }
686
687 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
688
689 if (addr_format_is_global(addr_format)) {
690 assert(op == nir_intrinsic_load_global);
691 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
692 } else {
693 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
694 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
695 }
696
697 if (mode != nir_var_mem_ubo)
698 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
699
700 /* TODO: We should try and provide a better alignment. For OpenCL, we need
701 * to plumb the alignment through from SPIR-V when we have one.
702 */
703 nir_intrinsic_set_align(load, intrin->dest.ssa.bit_size / 8, 0);
704
705 assert(intrin->dest.is_ssa);
706 load->num_components = num_components;
707 nir_ssa_dest_init(&load->instr, &load->dest, num_components,
708 intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
709 nir_builder_instr_insert(b, &load->instr);
710
711 return &load->dest.ssa;
712 }
713
714 static void
715 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
716 nir_ssa_def *addr, nir_address_format addr_format,
717 nir_ssa_def *value, nir_component_mask_t write_mask)
718 {
719 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
720
721 nir_intrinsic_op op;
722 switch (mode) {
723 case nir_var_mem_ssbo:
724 if (addr_format_is_global(addr_format))
725 op = nir_intrinsic_store_global;
726 else
727 op = nir_intrinsic_store_ssbo;
728 break;
729 case nir_var_mem_global:
730 assert(addr_format_is_global(addr_format));
731 op = nir_intrinsic_store_global;
732 break;
733 default:
734 unreachable("Unsupported explicit IO variable mode");
735 }
736
737 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
738
739 store->src[0] = nir_src_for_ssa(value);
740 if (addr_format_is_global(addr_format)) {
741 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
742 } else {
743 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
744 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
745 }
746
747 nir_intrinsic_set_write_mask(store, write_mask);
748
749 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
750
751 /* TODO: We should try and provide a better alignment. For OpenCL, we need
752 * to plumb the alignment through from SPIR-V when we have one.
753 */
754 nir_intrinsic_set_align(store, value->bit_size / 8, 0);
755
756 assert(value->num_components == 1 ||
757 value->num_components == intrin->num_components);
758 store->num_components = value->num_components;
759 nir_builder_instr_insert(b, &store->instr);
760 }
761
762 static nir_ssa_def *
763 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
764 nir_ssa_def *addr, nir_address_format addr_format)
765 {
766 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
767 const unsigned num_data_srcs =
768 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
769
770 nir_intrinsic_op op;
771 switch (mode) {
772 case nir_var_mem_ssbo:
773 if (addr_format_is_global(addr_format))
774 op = global_atomic_for_deref(intrin->intrinsic);
775 else
776 op = ssbo_atomic_for_deref(intrin->intrinsic);
777 break;
778 case nir_var_mem_global:
779 assert(addr_format_is_global(addr_format));
780 op = global_atomic_for_deref(intrin->intrinsic);
781 break;
782 default:
783 unreachable("Unsupported explicit IO variable mode");
784 }
785
786 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
787
788 unsigned src = 0;
789 if (addr_format_is_global(addr_format)) {
790 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
791 } else {
792 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
793 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
794 }
795 for (unsigned i = 0; i < num_data_srcs; i++) {
796 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
797 }
798
799 assert(intrin->dest.ssa.num_components == 1);
800 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
801 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
802 nir_builder_instr_insert(b, &atomic->instr);
803
804 return &atomic->dest.ssa;
805 }
806
807 static void
808 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
809 nir_address_format addr_format)
810 {
811 /* Just delete the deref if it's not used. We can't use
812 * nir_deref_instr_remove_if_unused here because it may remove more than
813 * one deref which could break our list walking since we walk the list
814 * backwards.
815 */
816 assert(list_empty(&deref->dest.ssa.if_uses));
817 if (list_empty(&deref->dest.ssa.uses)) {
818 nir_instr_remove(&deref->instr);
819 return;
820 }
821
822 b->cursor = nir_after_instr(&deref->instr);
823
824 /* Var derefs must be lowered away by the driver */
825 assert(deref->deref_type != nir_deref_type_var);
826
827 assert(deref->parent.is_ssa);
828 nir_ssa_def *parent_addr = deref->parent.ssa;
829
830 nir_ssa_def *addr = NULL;
831 assert(deref->dest.is_ssa);
832 switch (deref->deref_type) {
833 case nir_deref_type_var:
834 unreachable("Must be lowered by the driver");
835 break;
836
837 case nir_deref_type_array: {
838 nir_deref_instr *parent = nir_deref_instr_parent(deref);
839
840 unsigned stride = glsl_get_explicit_stride(parent->type);
841 if ((glsl_type_is_matrix(parent->type) &&
842 glsl_matrix_type_is_row_major(parent->type)) ||
843 (glsl_type_is_vector(parent->type) && stride == 0))
844 stride = type_scalar_size_bytes(parent->type);
845
846 assert(stride > 0);
847
848 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
849 index = nir_i2i(b, index, parent_addr->bit_size);
850 addr = build_addr_iadd(b, parent_addr, addr_format,
851 nir_imul_imm(b, index, stride));
852 break;
853 }
854
855 case nir_deref_type_ptr_as_array: {
856 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
857 index = nir_i2i(b, index, parent_addr->bit_size);
858 unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
859 addr = build_addr_iadd(b, parent_addr, addr_format,
860 nir_imul_imm(b, index, stride));
861 break;
862 }
863
864 case nir_deref_type_array_wildcard:
865 unreachable("Wildcards should be lowered by now");
866 break;
867
868 case nir_deref_type_struct: {
869 nir_deref_instr *parent = nir_deref_instr_parent(deref);
870 int offset = glsl_get_struct_field_offset(parent->type,
871 deref->strct.index);
872 assert(offset >= 0);
873 addr = build_addr_iadd_imm(b, parent_addr, addr_format, offset);
874 break;
875 }
876
877 case nir_deref_type_cast:
878 /* Nothing to do here */
879 addr = parent_addr;
880 break;
881 }
882
883 nir_instr_remove(&deref->instr);
884 nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
885 }
886
887 static void
888 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
889 nir_address_format addr_format)
890 {
891 b->cursor = nir_after_instr(&intrin->instr);
892
893 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
894 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
895 unsigned scalar_size = type_scalar_size_bytes(deref->type);
896 assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
897 assert(vec_stride == 0 || vec_stride >= scalar_size);
898
899 nir_ssa_def *addr = &deref->dest.ssa;
900 if (intrin->intrinsic == nir_intrinsic_load_deref) {
901 nir_ssa_def *value;
902 if (vec_stride > scalar_size) {
903 nir_ssa_def *comps[4] = { NULL, };
904 for (unsigned i = 0; i < intrin->num_components; i++) {
905 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
906 vec_stride * i);
907 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
908 addr_format, 1);
909 }
910 value = nir_vec(b, comps, intrin->num_components);
911 } else {
912 value = build_explicit_io_load(b, intrin, addr, addr_format,
913 intrin->num_components);
914 }
915 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
916 } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
917 assert(intrin->src[1].is_ssa);
918 nir_ssa_def *value = intrin->src[1].ssa;
919 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
920 if (vec_stride > scalar_size) {
921 for (unsigned i = 0; i < intrin->num_components; i++) {
922 if (!(write_mask & (1 << i)))
923 continue;
924
925 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
926 vec_stride * i);
927 build_explicit_io_store(b, intrin, comp_addr, addr_format,
928 nir_channel(b, value, i), 1);
929 }
930 } else {
931 build_explicit_io_store(b, intrin, addr, addr_format,
932 value, write_mask);
933 }
934 } else {
935 nir_ssa_def *value =
936 build_explicit_io_atomic(b, intrin, addr, addr_format);
937 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
938 }
939
940 nir_instr_remove(&intrin->instr);
941 }
942
943 static bool
944 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
945 nir_address_format addr_format)
946 {
947 bool progress = false;
948
949 nir_builder b;
950 nir_builder_init(&b, impl);
951
952 /* Walk in reverse order so that we can see the full deref chain when we
953 * lower the access operations. We lower them assuming that the derefs
954 * will be turned into address calculations later.
955 */
956 nir_foreach_block_reverse(block, impl) {
957 nir_foreach_instr_reverse_safe(instr, block) {
958 switch (instr->type) {
959 case nir_instr_type_deref: {
960 nir_deref_instr *deref = nir_instr_as_deref(instr);
961 if (deref->mode & modes) {
962 lower_explicit_io_deref(&b, deref, addr_format);
963 progress = true;
964 }
965 break;
966 }
967
968 case nir_instr_type_intrinsic: {
969 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
970 switch (intrin->intrinsic) {
971 case nir_intrinsic_load_deref:
972 case nir_intrinsic_store_deref:
973 case nir_intrinsic_deref_atomic_add:
974 case nir_intrinsic_deref_atomic_imin:
975 case nir_intrinsic_deref_atomic_umin:
976 case nir_intrinsic_deref_atomic_imax:
977 case nir_intrinsic_deref_atomic_umax:
978 case nir_intrinsic_deref_atomic_and:
979 case nir_intrinsic_deref_atomic_or:
980 case nir_intrinsic_deref_atomic_xor:
981 case nir_intrinsic_deref_atomic_exchange:
982 case nir_intrinsic_deref_atomic_comp_swap:
983 case nir_intrinsic_deref_atomic_fadd:
984 case nir_intrinsic_deref_atomic_fmin:
985 case nir_intrinsic_deref_atomic_fmax:
986 case nir_intrinsic_deref_atomic_fcomp_swap: {
987 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
988 if (deref->mode & modes) {
989 lower_explicit_io_access(&b, intrin, addr_format);
990 progress = true;
991 }
992 break;
993 }
994
995 default:
996 break;
997 }
998 break;
999 }
1000
1001 default:
1002 /* Nothing to do */
1003 break;
1004 }
1005 }
1006 }
1007
1008 if (progress) {
1009 nir_metadata_preserve(impl, nir_metadata_block_index |
1010 nir_metadata_dominance);
1011 }
1012
1013 return progress;
1014 }
1015
1016 bool
1017 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
1018 nir_address_format addr_format)
1019 {
1020 bool progress = false;
1021
1022 nir_foreach_function(function, shader) {
1023 if (function->impl &&
1024 nir_lower_explicit_io_impl(function->impl, modes, addr_format))
1025 progress = true;
1026 }
1027
1028 return progress;
1029 }
1030
1031 /**
1032 * Return the offset source for a load/store intrinsic.
1033 */
1034 nir_src *
1035 nir_get_io_offset_src(nir_intrinsic_instr *instr)
1036 {
1037 switch (instr->intrinsic) {
1038 case nir_intrinsic_load_input:
1039 case nir_intrinsic_load_output:
1040 case nir_intrinsic_load_shared:
1041 case nir_intrinsic_load_uniform:
1042 case nir_intrinsic_load_global:
1043 return &instr->src[0];
1044 case nir_intrinsic_load_ubo:
1045 case nir_intrinsic_load_ssbo:
1046 case nir_intrinsic_load_per_vertex_input:
1047 case nir_intrinsic_load_per_vertex_output:
1048 case nir_intrinsic_load_interpolated_input:
1049 case nir_intrinsic_store_output:
1050 case nir_intrinsic_store_shared:
1051 case nir_intrinsic_store_global:
1052 return &instr->src[1];
1053 case nir_intrinsic_store_ssbo:
1054 case nir_intrinsic_store_per_vertex_output:
1055 return &instr->src[2];
1056 default:
1057 return NULL;
1058 }
1059 }
1060
1061 /**
1062 * Return the vertex index source for a load/store per_vertex intrinsic.
1063 */
1064 nir_src *
1065 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
1066 {
1067 switch (instr->intrinsic) {
1068 case nir_intrinsic_load_per_vertex_input:
1069 case nir_intrinsic_load_per_vertex_output:
1070 return &instr->src[0];
1071 case nir_intrinsic_store_per_vertex_output:
1072 return &instr->src[1];
1073 default:
1074 return NULL;
1075 }
1076 }