nir: rename nir_var_ubo to nir_var_mem_ubo
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37
38 struct lower_io_state {
39 void *dead_ctx;
40 nir_builder builder;
41 int (*type_size)(const struct glsl_type *type);
42 nir_variable_mode modes;
43 nir_lower_io_options options;
44 };
45
46 void
47 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
48 int (*type_size)(const struct glsl_type *))
49 {
50 unsigned location = 0;
51
52 nir_foreach_variable(var, var_list) {
53 /*
54 * UBOs have their own address spaces, so don't count them towards the
55 * number of global uniforms
56 */
57 if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_ssbo)
58 continue;
59
60 var->data.driver_location = location;
61 location += type_size(var->type);
62 }
63
64 *size = location;
65 }
66
67 /**
68 * Return true if the given variable is a per-vertex input/output array.
69 * (such as geometry shader inputs).
70 */
71 bool
72 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
73 {
74 if (var->data.patch || !glsl_type_is_array(var->type))
75 return false;
76
77 if (var->data.mode == nir_var_shader_in)
78 return stage == MESA_SHADER_GEOMETRY ||
79 stage == MESA_SHADER_TESS_CTRL ||
80 stage == MESA_SHADER_TESS_EVAL;
81
82 if (var->data.mode == nir_var_shader_out)
83 return stage == MESA_SHADER_TESS_CTRL;
84
85 return false;
86 }
87
88 static nir_ssa_def *
89 get_io_offset(nir_builder *b, nir_deref_instr *deref,
90 nir_ssa_def **vertex_index,
91 int (*type_size)(const struct glsl_type *),
92 unsigned *component)
93 {
94 nir_deref_path path;
95 nir_deref_path_init(&path, deref, NULL);
96
97 assert(path.path[0]->deref_type == nir_deref_type_var);
98 nir_deref_instr **p = &path.path[1];
99
100 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
101 * outermost array index separate. Process the rest normally.
102 */
103 if (vertex_index != NULL) {
104 assert((*p)->deref_type == nir_deref_type_array);
105 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
106 p++;
107 }
108
109 if (path.path[0]->var->data.compact) {
110 assert((*p)->deref_type == nir_deref_type_array);
111 assert(glsl_type_is_scalar((*p)->type));
112
113 /* We always lower indirect dereferences for "compact" array vars. */
114 const unsigned index = nir_src_as_uint((*p)->arr.index);
115 const unsigned total_offset = *component + index;
116 const unsigned slot_offset = total_offset / 4;
117 *component = total_offset % 4;
118 return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
119 }
120
121 /* Just emit code and let constant-folding go to town */
122 nir_ssa_def *offset = nir_imm_int(b, 0);
123
124 for (; *p; p++) {
125 if ((*p)->deref_type == nir_deref_type_array) {
126 unsigned size = type_size((*p)->type);
127
128 nir_ssa_def *mul =
129 nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
130
131 offset = nir_iadd(b, offset, mul);
132 } else if ((*p)->deref_type == nir_deref_type_struct) {
133 /* p starts at path[1], so this is safe */
134 nir_deref_instr *parent = *(p - 1);
135
136 unsigned field_offset = 0;
137 for (unsigned i = 0; i < (*p)->strct.index; i++) {
138 field_offset += type_size(glsl_get_struct_field(parent->type, i));
139 }
140 offset = nir_iadd_imm(b, offset, field_offset);
141 } else {
142 unreachable("Unsupported deref type");
143 }
144 }
145
146 nir_deref_path_finish(&path);
147
148 return offset;
149 }
150
151 static nir_intrinsic_instr *
152 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
153 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
154 unsigned component)
155 {
156 const nir_shader *nir = state->builder.shader;
157 nir_variable_mode mode = var->data.mode;
158 nir_ssa_def *barycentric = NULL;
159
160 nir_intrinsic_op op;
161 switch (mode) {
162 case nir_var_shader_in:
163 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
164 nir->options->use_interpolated_input_intrinsics &&
165 var->data.interpolation != INTERP_MODE_FLAT) {
166 assert(vertex_index == NULL);
167
168 nir_intrinsic_op bary_op;
169 if (var->data.sample ||
170 (state->options & nir_lower_io_force_sample_interpolation))
171 bary_op = nir_intrinsic_load_barycentric_sample;
172 else if (var->data.centroid)
173 bary_op = nir_intrinsic_load_barycentric_centroid;
174 else
175 bary_op = nir_intrinsic_load_barycentric_pixel;
176
177 barycentric = nir_load_barycentric(&state->builder, bary_op,
178 var->data.interpolation);
179 op = nir_intrinsic_load_interpolated_input;
180 } else {
181 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
182 nir_intrinsic_load_input;
183 }
184 break;
185 case nir_var_shader_out:
186 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
187 nir_intrinsic_load_output;
188 break;
189 case nir_var_uniform:
190 op = nir_intrinsic_load_uniform;
191 break;
192 case nir_var_shared:
193 op = nir_intrinsic_load_shared;
194 break;
195 default:
196 unreachable("Unknown variable mode");
197 }
198
199 nir_intrinsic_instr *load =
200 nir_intrinsic_instr_create(state->builder.shader, op);
201 load->num_components = intrin->num_components;
202
203 nir_intrinsic_set_base(load, var->data.driver_location);
204 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
205 nir_intrinsic_set_component(load, component);
206
207 if (load->intrinsic == nir_intrinsic_load_uniform)
208 nir_intrinsic_set_range(load, state->type_size(var->type));
209
210 if (vertex_index) {
211 load->src[0] = nir_src_for_ssa(vertex_index);
212 load->src[1] = nir_src_for_ssa(offset);
213 } else if (barycentric) {
214 load->src[0] = nir_src_for_ssa(barycentric);
215 load->src[1] = nir_src_for_ssa(offset);
216 } else {
217 load->src[0] = nir_src_for_ssa(offset);
218 }
219
220 return load;
221 }
222
223 static nir_intrinsic_instr *
224 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
225 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
226 unsigned component)
227 {
228 nir_variable_mode mode = var->data.mode;
229
230 nir_intrinsic_op op;
231 if (mode == nir_var_shared) {
232 op = nir_intrinsic_store_shared;
233 } else {
234 assert(mode == nir_var_shader_out);
235 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
236 nir_intrinsic_store_output;
237 }
238
239 nir_intrinsic_instr *store =
240 nir_intrinsic_instr_create(state->builder.shader, op);
241 store->num_components = intrin->num_components;
242
243 nir_src_copy(&store->src[0], &intrin->src[1], store);
244
245 nir_intrinsic_set_base(store, var->data.driver_location);
246
247 if (mode == nir_var_shader_out)
248 nir_intrinsic_set_component(store, component);
249
250 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
251
252 if (vertex_index)
253 store->src[1] = nir_src_for_ssa(vertex_index);
254
255 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
256
257 return store;
258 }
259
260 static nir_intrinsic_instr *
261 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
262 nir_variable *var, nir_ssa_def *offset)
263 {
264 assert(var->data.mode == nir_var_shared);
265
266 nir_intrinsic_op op;
267 switch (intrin->intrinsic) {
268 #define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break;
269 OP(atomic_exchange)
270 OP(atomic_comp_swap)
271 OP(atomic_add)
272 OP(atomic_imin)
273 OP(atomic_umin)
274 OP(atomic_imax)
275 OP(atomic_umax)
276 OP(atomic_and)
277 OP(atomic_or)
278 OP(atomic_xor)
279 OP(atomic_fadd)
280 OP(atomic_fmin)
281 OP(atomic_fmax)
282 OP(atomic_fcomp_swap)
283 #undef OP
284 default:
285 unreachable("Invalid atomic");
286 }
287
288 nir_intrinsic_instr *atomic =
289 nir_intrinsic_instr_create(state->builder.shader, op);
290
291 nir_intrinsic_set_base(atomic, var->data.driver_location);
292
293 atomic->src[0] = nir_src_for_ssa(offset);
294 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs ==
295 nir_intrinsic_infos[op].num_srcs);
296 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) {
297 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic);
298 }
299
300 return atomic;
301 }
302
303 static nir_intrinsic_instr *
304 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
305 nir_variable *var, nir_ssa_def *offset, unsigned component)
306 {
307 assert(var->data.mode == nir_var_shader_in);
308
309 /* Ignore interpolateAt() for flat variables - flat is flat. */
310 if (var->data.interpolation == INTERP_MODE_FLAT)
311 return lower_load(intrin, state, NULL, var, offset, component);
312
313 nir_intrinsic_op bary_op;
314 switch (intrin->intrinsic) {
315 case nir_intrinsic_interp_deref_at_centroid:
316 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
317 nir_intrinsic_load_barycentric_sample :
318 nir_intrinsic_load_barycentric_centroid;
319 break;
320 case nir_intrinsic_interp_deref_at_sample:
321 bary_op = nir_intrinsic_load_barycentric_at_sample;
322 break;
323 case nir_intrinsic_interp_deref_at_offset:
324 bary_op = nir_intrinsic_load_barycentric_at_offset;
325 break;
326 default:
327 unreachable("Bogus interpolateAt() intrinsic.");
328 }
329
330 nir_intrinsic_instr *bary_setup =
331 nir_intrinsic_instr_create(state->builder.shader, bary_op);
332
333 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
334 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
335
336 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
337 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset)
338 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
339
340 nir_builder_instr_insert(&state->builder, &bary_setup->instr);
341
342 nir_intrinsic_instr *load =
343 nir_intrinsic_instr_create(state->builder.shader,
344 nir_intrinsic_load_interpolated_input);
345 load->num_components = intrin->num_components;
346
347 nir_intrinsic_set_base(load, var->data.driver_location);
348 nir_intrinsic_set_component(load, component);
349
350 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
351 load->src[1] = nir_src_for_ssa(offset);
352
353 return load;
354 }
355
356 static bool
357 nir_lower_io_block(nir_block *block,
358 struct lower_io_state *state)
359 {
360 nir_builder *b = &state->builder;
361 const nir_shader_compiler_options *options = b->shader->options;
362 bool progress = false;
363
364 nir_foreach_instr_safe(instr, block) {
365 if (instr->type != nir_instr_type_intrinsic)
366 continue;
367
368 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
369
370 switch (intrin->intrinsic) {
371 case nir_intrinsic_load_deref:
372 case nir_intrinsic_store_deref:
373 case nir_intrinsic_deref_atomic_add:
374 case nir_intrinsic_deref_atomic_imin:
375 case nir_intrinsic_deref_atomic_umin:
376 case nir_intrinsic_deref_atomic_imax:
377 case nir_intrinsic_deref_atomic_umax:
378 case nir_intrinsic_deref_atomic_and:
379 case nir_intrinsic_deref_atomic_or:
380 case nir_intrinsic_deref_atomic_xor:
381 case nir_intrinsic_deref_atomic_exchange:
382 case nir_intrinsic_deref_atomic_comp_swap:
383 case nir_intrinsic_deref_atomic_fadd:
384 case nir_intrinsic_deref_atomic_fmin:
385 case nir_intrinsic_deref_atomic_fmax:
386 case nir_intrinsic_deref_atomic_fcomp_swap:
387 /* We can lower the io for this nir instrinsic */
388 break;
389 case nir_intrinsic_interp_deref_at_centroid:
390 case nir_intrinsic_interp_deref_at_sample:
391 case nir_intrinsic_interp_deref_at_offset:
392 /* We can optionally lower these to load_interpolated_input */
393 if (options->use_interpolated_input_intrinsics)
394 break;
395 default:
396 /* We can't lower the io for this nir instrinsic, so skip it */
397 continue;
398 }
399
400 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
401
402 nir_variable *var = nir_deref_instr_get_variable(deref);
403 nir_variable_mode mode = var->data.mode;
404
405 if ((state->modes & mode) == 0)
406 continue;
407
408 if (mode != nir_var_shader_in &&
409 mode != nir_var_shader_out &&
410 mode != nir_var_shared &&
411 mode != nir_var_uniform)
412 continue;
413
414 b->cursor = nir_before_instr(instr);
415
416 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
417
418 nir_ssa_def *offset;
419 nir_ssa_def *vertex_index = NULL;
420 unsigned component_offset = var->data.location_frac;
421
422 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
423 state->type_size, &component_offset);
424
425 nir_intrinsic_instr *replacement;
426
427 switch (intrin->intrinsic) {
428 case nir_intrinsic_load_deref:
429 replacement = lower_load(intrin, state, vertex_index, var, offset,
430 component_offset);
431 break;
432
433 case nir_intrinsic_store_deref:
434 replacement = lower_store(intrin, state, vertex_index, var, offset,
435 component_offset);
436 break;
437
438 case nir_intrinsic_deref_atomic_add:
439 case nir_intrinsic_deref_atomic_imin:
440 case nir_intrinsic_deref_atomic_umin:
441 case nir_intrinsic_deref_atomic_imax:
442 case nir_intrinsic_deref_atomic_umax:
443 case nir_intrinsic_deref_atomic_and:
444 case nir_intrinsic_deref_atomic_or:
445 case nir_intrinsic_deref_atomic_xor:
446 case nir_intrinsic_deref_atomic_exchange:
447 case nir_intrinsic_deref_atomic_comp_swap:
448 case nir_intrinsic_deref_atomic_fadd:
449 case nir_intrinsic_deref_atomic_fmin:
450 case nir_intrinsic_deref_atomic_fmax:
451 case nir_intrinsic_deref_atomic_fcomp_swap:
452 assert(vertex_index == NULL);
453 replacement = lower_atomic(intrin, state, var, offset);
454 break;
455
456 case nir_intrinsic_interp_deref_at_centroid:
457 case nir_intrinsic_interp_deref_at_sample:
458 case nir_intrinsic_interp_deref_at_offset:
459 assert(vertex_index == NULL);
460 replacement = lower_interpolate_at(intrin, state, var, offset,
461 component_offset);
462 break;
463
464 default:
465 continue;
466 }
467
468 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
469 if (intrin->dest.is_ssa) {
470 nir_ssa_dest_init(&replacement->instr, &replacement->dest,
471 intrin->dest.ssa.num_components,
472 intrin->dest.ssa.bit_size, NULL);
473 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
474 nir_src_for_ssa(&replacement->dest.ssa));
475 } else {
476 nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr);
477 }
478 }
479
480 nir_instr_insert_before(&intrin->instr, &replacement->instr);
481 nir_instr_remove(&intrin->instr);
482 progress = true;
483 }
484
485 return progress;
486 }
487
488 static bool
489 nir_lower_io_impl(nir_function_impl *impl,
490 nir_variable_mode modes,
491 int (*type_size)(const struct glsl_type *),
492 nir_lower_io_options options)
493 {
494 struct lower_io_state state;
495 bool progress = false;
496
497 nir_builder_init(&state.builder, impl);
498 state.dead_ctx = ralloc_context(NULL);
499 state.modes = modes;
500 state.type_size = type_size;
501 state.options = options;
502
503 nir_foreach_block(block, impl) {
504 progress |= nir_lower_io_block(block, &state);
505 }
506
507 ralloc_free(state.dead_ctx);
508
509 nir_metadata_preserve(impl, nir_metadata_block_index |
510 nir_metadata_dominance);
511 return progress;
512 }
513
514 bool
515 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
516 int (*type_size)(const struct glsl_type *),
517 nir_lower_io_options options)
518 {
519 bool progress = false;
520
521 nir_foreach_function(function, shader) {
522 if (function->impl) {
523 progress |= nir_lower_io_impl(function->impl, modes,
524 type_size, options);
525 }
526 }
527
528 return progress;
529 }
530
531 static unsigned
532 type_scalar_size_bytes(const struct glsl_type *type)
533 {
534 assert(glsl_type_is_vector_or_scalar(type) ||
535 glsl_type_is_matrix(type));
536 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
537 }
538
539 static nir_ssa_def *
540 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
541 nir_address_format addr_format, nir_ssa_def *offset)
542 {
543 assert(offset->num_components == 1);
544 assert(addr->bit_size == offset->bit_size);
545
546 switch (addr_format) {
547 case nir_address_format_vk_index_offset:
548 assert(addr->num_components == 2);
549 return nir_vec2(b, nir_channel(b, addr, 0),
550 nir_iadd(b, nir_channel(b, addr, 1), offset));
551 }
552 unreachable("Invalid address format");
553 }
554
555 static nir_ssa_def *
556 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
557 nir_address_format addr_format, int64_t offset)
558 {
559 return build_addr_iadd(b, addr, addr_format,
560 nir_imm_intN_t(b, offset, addr->bit_size));
561 }
562
563 static nir_ssa_def *
564 addr_to_index(nir_builder *b, nir_ssa_def *addr,
565 nir_address_format addr_format)
566 {
567 assert(addr_format == nir_address_format_vk_index_offset);
568 assert(addr->num_components == 2);
569 return nir_channel(b, addr, 0);
570 }
571
572 static nir_ssa_def *
573 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
574 nir_address_format addr_format)
575 {
576 assert(addr_format == nir_address_format_vk_index_offset);
577 assert(addr->num_components == 2);
578 return nir_channel(b, addr, 1);
579 }
580
581 static nir_ssa_def *
582 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
583 nir_ssa_def *addr, nir_address_format addr_format,
584 unsigned num_components)
585 {
586 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
587
588 nir_intrinsic_op op;
589 switch (mode) {
590 case nir_var_mem_ubo:
591 op = nir_intrinsic_load_ubo;
592 break;
593 case nir_var_ssbo:
594 op = nir_intrinsic_load_ssbo;
595 break;
596 default:
597 unreachable("Unsupported explicit IO variable mode");
598 }
599
600 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
601
602 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
603 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
604
605 if (mode != nir_var_mem_ubo)
606 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
607
608 /* TODO: We should try and provide a better alignment. For OpenCL, we need
609 * to plumb the alignment through from SPIR-V when we have one.
610 */
611 nir_intrinsic_set_align(load, intrin->dest.ssa.bit_size / 8, 0);
612
613 assert(intrin->dest.is_ssa);
614 load->num_components = num_components;
615 nir_ssa_dest_init(&load->instr, &load->dest, num_components,
616 intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
617 nir_builder_instr_insert(b, &load->instr);
618
619 return &load->dest.ssa;
620 }
621
622 static void
623 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
624 nir_ssa_def *addr, nir_address_format addr_format,
625 nir_ssa_def *value, nir_component_mask_t write_mask)
626 {
627 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
628
629 nir_intrinsic_op op;
630 switch (mode) {
631 case nir_var_ssbo:
632 op = nir_intrinsic_store_ssbo;
633 break;
634 default:
635 unreachable("Unsupported explicit IO variable mode");
636 }
637
638 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
639
640 store->src[0] = nir_src_for_ssa(value);
641 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
642 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
643
644 nir_intrinsic_set_write_mask(store, write_mask);
645
646 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
647
648 /* TODO: We should try and provide a better alignment. For OpenCL, we need
649 * to plumb the alignment through from SPIR-V when we have one.
650 */
651 nir_intrinsic_set_align(store, value->bit_size / 8, 0);
652
653 assert(value->num_components == 1 ||
654 value->num_components == intrin->num_components);
655 store->num_components = value->num_components;
656 nir_builder_instr_insert(b, &store->instr);
657 }
658
659 static nir_ssa_def *
660 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
661 nir_ssa_def *addr, nir_address_format addr_format)
662 {
663 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
664 const unsigned num_data_srcs =
665 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
666
667 nir_intrinsic_op op;
668 switch (mode) {
669 case nir_var_ssbo:
670 switch (intrin->intrinsic) {
671 #define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_ssbo_##O; break;
672 OP(atomic_exchange)
673 OP(atomic_comp_swap)
674 OP(atomic_add)
675 OP(atomic_imin)
676 OP(atomic_umin)
677 OP(atomic_imax)
678 OP(atomic_umax)
679 OP(atomic_and)
680 OP(atomic_or)
681 OP(atomic_xor)
682 OP(atomic_fadd)
683 OP(atomic_fmin)
684 OP(atomic_fmax)
685 OP(atomic_fcomp_swap)
686 #undef OP
687 default:
688 unreachable("Invalid SSBO atomic");
689 }
690 break;
691 default:
692 unreachable("Unsupported explicit IO variable mode");
693 }
694
695 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
696
697 atomic->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
698 atomic->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
699 for (unsigned i = 0; i < num_data_srcs; i++) {
700 assert(intrin->src[1 + i].is_ssa);
701 atomic->src[2 + i] = nir_src_for_ssa(intrin->src[1 + i].ssa);
702 }
703
704 assert(intrin->dest.ssa.num_components == 1);
705 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
706 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
707 nir_builder_instr_insert(b, &atomic->instr);
708
709 return &atomic->dest.ssa;
710 }
711
712 static void
713 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
714 nir_address_format addr_format)
715 {
716 /* Just delete the deref if it's not used. We can't use
717 * nir_deref_instr_remove_if_unused here because it may remove more than
718 * one deref which could break our list walking since we walk the list
719 * backwards.
720 */
721 assert(list_empty(&deref->dest.ssa.if_uses));
722 if (list_empty(&deref->dest.ssa.uses)) {
723 nir_instr_remove(&deref->instr);
724 return;
725 }
726
727 b->cursor = nir_after_instr(&deref->instr);
728
729 /* Var derefs must be lowered away by the driver */
730 assert(deref->deref_type != nir_deref_type_var);
731
732 assert(deref->parent.is_ssa);
733 nir_ssa_def *parent_addr = deref->parent.ssa;
734
735 nir_ssa_def *addr = NULL;
736 assert(deref->dest.is_ssa);
737 switch (deref->deref_type) {
738 case nir_deref_type_var:
739 unreachable("Must be lowered by the driver");
740 break;
741
742 case nir_deref_type_array: {
743 nir_deref_instr *parent = nir_deref_instr_parent(deref);
744
745 unsigned stride = glsl_get_explicit_stride(parent->type);
746 if ((glsl_type_is_matrix(parent->type) &&
747 glsl_matrix_type_is_row_major(parent->type)) ||
748 (glsl_type_is_vector(parent->type) && stride == 0))
749 stride = type_scalar_size_bytes(parent->type);
750
751 assert(stride > 0);
752
753 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
754 index = nir_i2i(b, index, parent_addr->bit_size);
755 addr = build_addr_iadd(b, parent_addr, addr_format,
756 nir_imul_imm(b, index, stride));
757 break;
758 }
759
760 case nir_deref_type_ptr_as_array: {
761 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
762 index = nir_i2i(b, index, parent_addr->bit_size);
763 unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
764 addr = build_addr_iadd(b, parent_addr, addr_format,
765 nir_imul_imm(b, index, stride));
766 break;
767 }
768
769 case nir_deref_type_array_wildcard:
770 unreachable("Wildcards should be lowered by now");
771 break;
772
773 case nir_deref_type_struct: {
774 nir_deref_instr *parent = nir_deref_instr_parent(deref);
775 int offset = glsl_get_struct_field_offset(parent->type,
776 deref->strct.index);
777 assert(offset >= 0);
778 addr = build_addr_iadd_imm(b, parent_addr, addr_format, offset);
779 break;
780 }
781
782 case nir_deref_type_cast:
783 /* Nothing to do here */
784 addr = parent_addr;
785 break;
786 }
787
788 nir_instr_remove(&deref->instr);
789 nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
790 }
791
792 static void
793 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
794 nir_address_format addr_format)
795 {
796 b->cursor = nir_after_instr(&intrin->instr);
797
798 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
799 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
800 unsigned scalar_size = type_scalar_size_bytes(deref->type);
801 assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
802 assert(vec_stride == 0 || vec_stride >= scalar_size);
803
804 nir_ssa_def *addr = &deref->dest.ssa;
805 if (intrin->intrinsic == nir_intrinsic_load_deref) {
806 nir_ssa_def *value;
807 if (vec_stride > scalar_size) {
808 nir_ssa_def *comps[4] = { NULL, };
809 for (unsigned i = 0; i < intrin->num_components; i++) {
810 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
811 vec_stride * i);
812 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
813 addr_format, 1);
814 }
815 value = nir_vec(b, comps, intrin->num_components);
816 } else {
817 value = build_explicit_io_load(b, intrin, addr, addr_format,
818 intrin->num_components);
819 }
820 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
821 } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
822 assert(intrin->src[1].is_ssa);
823 nir_ssa_def *value = intrin->src[1].ssa;
824 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
825 if (vec_stride > scalar_size) {
826 for (unsigned i = 0; i < intrin->num_components; i++) {
827 if (!(write_mask & (1 << i)))
828 continue;
829
830 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
831 vec_stride * i);
832 build_explicit_io_store(b, intrin, comp_addr, addr_format,
833 nir_channel(b, value, i), 1);
834 }
835 } else {
836 build_explicit_io_store(b, intrin, addr, addr_format,
837 value, write_mask);
838 }
839 } else {
840 nir_ssa_def *value =
841 build_explicit_io_atomic(b, intrin, addr, addr_format);
842 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
843 }
844
845 nir_instr_remove(&intrin->instr);
846 }
847
848 static bool
849 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
850 nir_address_format addr_format)
851 {
852 bool progress = false;
853
854 nir_builder b;
855 nir_builder_init(&b, impl);
856
857 /* Walk in reverse order so that we can see the full deref chain when we
858 * lower the access operations. We lower them assuming that the derefs
859 * will be turned into address calculations later.
860 */
861 nir_foreach_block_reverse(block, impl) {
862 nir_foreach_instr_reverse_safe(instr, block) {
863 switch (instr->type) {
864 case nir_instr_type_deref: {
865 nir_deref_instr *deref = nir_instr_as_deref(instr);
866 if (deref->mode & modes) {
867 lower_explicit_io_deref(&b, deref, addr_format);
868 progress = true;
869 }
870 break;
871 }
872
873 case nir_instr_type_intrinsic: {
874 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
875 switch (intrin->intrinsic) {
876 case nir_intrinsic_load_deref:
877 case nir_intrinsic_store_deref:
878 case nir_intrinsic_deref_atomic_add:
879 case nir_intrinsic_deref_atomic_imin:
880 case nir_intrinsic_deref_atomic_umin:
881 case nir_intrinsic_deref_atomic_imax:
882 case nir_intrinsic_deref_atomic_umax:
883 case nir_intrinsic_deref_atomic_and:
884 case nir_intrinsic_deref_atomic_or:
885 case nir_intrinsic_deref_atomic_xor:
886 case nir_intrinsic_deref_atomic_exchange:
887 case nir_intrinsic_deref_atomic_comp_swap:
888 case nir_intrinsic_deref_atomic_fadd:
889 case nir_intrinsic_deref_atomic_fmin:
890 case nir_intrinsic_deref_atomic_fmax:
891 case nir_intrinsic_deref_atomic_fcomp_swap: {
892 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
893 if (deref->mode & modes) {
894 lower_explicit_io_access(&b, intrin, addr_format);
895 progress = true;
896 }
897 break;
898 }
899
900 default:
901 break;
902 }
903 break;
904 }
905
906 default:
907 /* Nothing to do */
908 break;
909 }
910 }
911 }
912
913 if (progress) {
914 nir_metadata_preserve(impl, nir_metadata_block_index |
915 nir_metadata_dominance);
916 }
917
918 return progress;
919 }
920
921 bool
922 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
923 nir_address_format addr_format)
924 {
925 bool progress = false;
926
927 nir_foreach_function(function, shader) {
928 if (function->impl &&
929 nir_lower_explicit_io_impl(function->impl, modes, addr_format))
930 progress = true;
931 }
932
933 return progress;
934 }
935
936 /**
937 * Return the offset source for a load/store intrinsic.
938 */
939 nir_src *
940 nir_get_io_offset_src(nir_intrinsic_instr *instr)
941 {
942 switch (instr->intrinsic) {
943 case nir_intrinsic_load_input:
944 case nir_intrinsic_load_output:
945 case nir_intrinsic_load_shared:
946 case nir_intrinsic_load_uniform:
947 return &instr->src[0];
948 case nir_intrinsic_load_ubo:
949 case nir_intrinsic_load_ssbo:
950 case nir_intrinsic_load_per_vertex_input:
951 case nir_intrinsic_load_per_vertex_output:
952 case nir_intrinsic_load_interpolated_input:
953 case nir_intrinsic_store_output:
954 case nir_intrinsic_store_shared:
955 return &instr->src[1];
956 case nir_intrinsic_store_ssbo:
957 case nir_intrinsic_store_per_vertex_output:
958 return &instr->src[2];
959 default:
960 return NULL;
961 }
962 }
963
964 /**
965 * Return the vertex index source for a load/store per_vertex intrinsic.
966 */
967 nir_src *
968 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
969 {
970 switch (instr->intrinsic) {
971 case nir_intrinsic_load_per_vertex_input:
972 case nir_intrinsic_load_per_vertex_output:
973 return &instr->src[0];
974 case nir_intrinsic_store_per_vertex_output:
975 return &instr->src[1];
976 default:
977 return NULL;
978 }
979 }