util: rename list_empty() to list_is_empty()
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37
38 #include "util/u_math.h"
39
40 struct lower_io_state {
41 void *dead_ctx;
42 nir_builder builder;
43 int (*type_size)(const struct glsl_type *type, bool);
44 nir_variable_mode modes;
45 nir_lower_io_options options;
46 };
47
48 static nir_intrinsic_op
49 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
50 {
51 switch (deref_op) {
52 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
53 OP(atomic_exchange)
54 OP(atomic_comp_swap)
55 OP(atomic_add)
56 OP(atomic_imin)
57 OP(atomic_umin)
58 OP(atomic_imax)
59 OP(atomic_umax)
60 OP(atomic_and)
61 OP(atomic_or)
62 OP(atomic_xor)
63 OP(atomic_fadd)
64 OP(atomic_fmin)
65 OP(atomic_fmax)
66 OP(atomic_fcomp_swap)
67 #undef OP
68 default:
69 unreachable("Invalid SSBO atomic");
70 }
71 }
72
73 static nir_intrinsic_op
74 global_atomic_for_deref(nir_intrinsic_op deref_op)
75 {
76 switch (deref_op) {
77 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
78 OP(atomic_exchange)
79 OP(atomic_comp_swap)
80 OP(atomic_add)
81 OP(atomic_imin)
82 OP(atomic_umin)
83 OP(atomic_imax)
84 OP(atomic_umax)
85 OP(atomic_and)
86 OP(atomic_or)
87 OP(atomic_xor)
88 OP(atomic_fadd)
89 OP(atomic_fmin)
90 OP(atomic_fmax)
91 OP(atomic_fcomp_swap)
92 #undef OP
93 default:
94 unreachable("Invalid SSBO atomic");
95 }
96 }
97
98 static nir_intrinsic_op
99 shared_atomic_for_deref(nir_intrinsic_op deref_op)
100 {
101 switch (deref_op) {
102 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
103 OP(atomic_exchange)
104 OP(atomic_comp_swap)
105 OP(atomic_add)
106 OP(atomic_imin)
107 OP(atomic_umin)
108 OP(atomic_imax)
109 OP(atomic_umax)
110 OP(atomic_and)
111 OP(atomic_or)
112 OP(atomic_xor)
113 OP(atomic_fadd)
114 OP(atomic_fmin)
115 OP(atomic_fmax)
116 OP(atomic_fcomp_swap)
117 #undef OP
118 default:
119 unreachable("Invalid shared atomic");
120 }
121 }
122
123 void
124 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
125 int (*type_size)(const struct glsl_type *, bool))
126 {
127 unsigned location = 0;
128
129 nir_foreach_variable(var, var_list) {
130 /*
131 * UBOs have their own address spaces, so don't count them towards the
132 * number of global uniforms
133 */
134 if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo)
135 continue;
136
137 var->data.driver_location = location;
138 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
139 var->data.mode == nir_var_shader_out ||
140 var->data.bindless;
141 location += type_size(var->type, bindless_type_size);
142 }
143
144 *size = location;
145 }
146
147 /**
148 * Return true if the given variable is a per-vertex input/output array.
149 * (such as geometry shader inputs).
150 */
151 bool
152 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
153 {
154 if (var->data.patch || !glsl_type_is_array(var->type))
155 return false;
156
157 if (var->data.mode == nir_var_shader_in)
158 return stage == MESA_SHADER_GEOMETRY ||
159 stage == MESA_SHADER_TESS_CTRL ||
160 stage == MESA_SHADER_TESS_EVAL;
161
162 if (var->data.mode == nir_var_shader_out)
163 return stage == MESA_SHADER_TESS_CTRL;
164
165 return false;
166 }
167
168 static nir_ssa_def *
169 get_io_offset(nir_builder *b, nir_deref_instr *deref,
170 nir_ssa_def **vertex_index,
171 int (*type_size)(const struct glsl_type *, bool),
172 unsigned *component, bool bts)
173 {
174 nir_deref_path path;
175 nir_deref_path_init(&path, deref, NULL);
176
177 assert(path.path[0]->deref_type == nir_deref_type_var);
178 nir_deref_instr **p = &path.path[1];
179
180 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
181 * outermost array index separate. Process the rest normally.
182 */
183 if (vertex_index != NULL) {
184 assert((*p)->deref_type == nir_deref_type_array);
185 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
186 p++;
187 }
188
189 if (path.path[0]->var->data.compact) {
190 assert((*p)->deref_type == nir_deref_type_array);
191 assert(glsl_type_is_scalar((*p)->type));
192
193 /* We always lower indirect dereferences for "compact" array vars. */
194 const unsigned index = nir_src_as_uint((*p)->arr.index);
195 const unsigned total_offset = *component + index;
196 const unsigned slot_offset = total_offset / 4;
197 *component = total_offset % 4;
198 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
199 }
200
201 /* Just emit code and let constant-folding go to town */
202 nir_ssa_def *offset = nir_imm_int(b, 0);
203
204 for (; *p; p++) {
205 if ((*p)->deref_type == nir_deref_type_array) {
206 unsigned size = type_size((*p)->type, bts);
207
208 nir_ssa_def *mul =
209 nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
210
211 offset = nir_iadd(b, offset, mul);
212 } else if ((*p)->deref_type == nir_deref_type_struct) {
213 /* p starts at path[1], so this is safe */
214 nir_deref_instr *parent = *(p - 1);
215
216 unsigned field_offset = 0;
217 for (unsigned i = 0; i < (*p)->strct.index; i++) {
218 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
219 }
220 offset = nir_iadd_imm(b, offset, field_offset);
221 } else {
222 unreachable("Unsupported deref type");
223 }
224 }
225
226 nir_deref_path_finish(&path);
227
228 return offset;
229 }
230
231 static nir_ssa_def *
232 emit_load(struct lower_io_state *state,
233 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
234 unsigned component, unsigned num_components, unsigned bit_size,
235 nir_alu_type type)
236 {
237 nir_builder *b = &state->builder;
238 const nir_shader *nir = b->shader;
239 nir_variable_mode mode = var->data.mode;
240 nir_ssa_def *barycentric = NULL;
241
242 nir_intrinsic_op op;
243 switch (mode) {
244 case nir_var_shader_in:
245 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
246 nir->options->use_interpolated_input_intrinsics &&
247 var->data.interpolation != INTERP_MODE_FLAT) {
248 assert(vertex_index == NULL);
249
250 nir_intrinsic_op bary_op;
251 if (var->data.sample ||
252 (state->options & nir_lower_io_force_sample_interpolation))
253 bary_op = nir_intrinsic_load_barycentric_sample;
254 else if (var->data.centroid)
255 bary_op = nir_intrinsic_load_barycentric_centroid;
256 else
257 bary_op = nir_intrinsic_load_barycentric_pixel;
258
259 barycentric = nir_load_barycentric(&state->builder, bary_op,
260 var->data.interpolation);
261 op = nir_intrinsic_load_interpolated_input;
262 } else {
263 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
264 nir_intrinsic_load_input;
265 }
266 break;
267 case nir_var_shader_out:
268 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
269 nir_intrinsic_load_output;
270 break;
271 case nir_var_uniform:
272 op = nir_intrinsic_load_uniform;
273 break;
274 case nir_var_mem_shared:
275 op = nir_intrinsic_load_shared;
276 break;
277 default:
278 unreachable("Unknown variable mode");
279 }
280
281 nir_intrinsic_instr *load =
282 nir_intrinsic_instr_create(state->builder.shader, op);
283 load->num_components = num_components;
284
285 nir_intrinsic_set_base(load, var->data.driver_location);
286 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
287 nir_intrinsic_set_component(load, component);
288
289 if (load->intrinsic == nir_intrinsic_load_uniform)
290 nir_intrinsic_set_range(load,
291 state->type_size(var->type, var->data.bindless));
292
293 if (load->intrinsic == nir_intrinsic_load_input ||
294 load->intrinsic == nir_intrinsic_load_uniform)
295 nir_intrinsic_set_type(load, type);
296
297 if (vertex_index) {
298 load->src[0] = nir_src_for_ssa(vertex_index);
299 load->src[1] = nir_src_for_ssa(offset);
300 } else if (barycentric) {
301 load->src[0] = nir_src_for_ssa(barycentric);
302 load->src[1] = nir_src_for_ssa(offset);
303 } else {
304 load->src[0] = nir_src_for_ssa(offset);
305 }
306
307 nir_ssa_dest_init(&load->instr, &load->dest,
308 num_components, bit_size, NULL);
309 nir_builder_instr_insert(b, &load->instr);
310
311 return &load->dest.ssa;
312 }
313
314 static nir_ssa_def *
315 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
316 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
317 unsigned component, const struct glsl_type *type)
318 {
319 assert(intrin->dest.is_ssa);
320 if (intrin->dest.ssa.bit_size == 64 &&
321 (state->options & nir_lower_io_lower_64bit_to_32)) {
322 nir_builder *b = &state->builder;
323
324 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
325
326 nir_ssa_def *comp64[4];
327 assert(component == 0 || component == 2);
328 unsigned dest_comp = 0;
329 while (dest_comp < intrin->dest.ssa.num_components) {
330 const unsigned num_comps =
331 MIN2(intrin->dest.ssa.num_components - dest_comp,
332 (4 - component) / 2);
333
334 nir_ssa_def *data32 =
335 emit_load(state, vertex_index, var, offset, component,
336 num_comps * 2, 32, nir_type_uint32);
337 for (unsigned i = 0; i < num_comps; i++) {
338 comp64[dest_comp + i] =
339 nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
340 }
341
342 /* Only the first store has a component offset */
343 component = 0;
344 dest_comp += num_comps;
345 offset = nir_iadd_imm(b, offset, slot_size);
346 }
347
348 return nir_vec(b, comp64, intrin->dest.ssa.num_components);
349 } else {
350 return emit_load(state, vertex_index, var, offset, component,
351 intrin->dest.ssa.num_components,
352 intrin->dest.ssa.bit_size,
353 nir_get_nir_type_for_glsl_type(type));
354 }
355 }
356
357 static void
358 emit_store(struct lower_io_state *state, nir_ssa_def *data,
359 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
360 unsigned component, unsigned num_components,
361 nir_component_mask_t write_mask, nir_alu_type type)
362 {
363 nir_builder *b = &state->builder;
364 nir_variable_mode mode = var->data.mode;
365
366 nir_intrinsic_op op;
367 if (mode == nir_var_mem_shared) {
368 op = nir_intrinsic_store_shared;
369 } else {
370 assert(mode == nir_var_shader_out);
371 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
372 nir_intrinsic_store_output;
373 }
374
375 nir_intrinsic_instr *store =
376 nir_intrinsic_instr_create(state->builder.shader, op);
377 store->num_components = num_components;
378
379 store->src[0] = nir_src_for_ssa(data);
380
381 nir_intrinsic_set_base(store, var->data.driver_location);
382
383 if (mode == nir_var_shader_out)
384 nir_intrinsic_set_component(store, component);
385
386 if (store->intrinsic == nir_intrinsic_store_output)
387 nir_intrinsic_set_type(store, type);
388
389 nir_intrinsic_set_write_mask(store, write_mask);
390
391 if (vertex_index)
392 store->src[1] = nir_src_for_ssa(vertex_index);
393
394 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
395
396 nir_builder_instr_insert(b, &store->instr);
397 }
398
399 static void
400 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
401 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
402 unsigned component, const struct glsl_type *type)
403 {
404 assert(intrin->src[1].is_ssa);
405 if (intrin->src[1].ssa->bit_size == 64 &&
406 (state->options & nir_lower_io_lower_64bit_to_32)) {
407 nir_builder *b = &state->builder;
408
409 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
410
411 assert(component == 0 || component == 2);
412 unsigned src_comp = 0;
413 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
414 while (src_comp < intrin->num_components) {
415 const unsigned num_comps =
416 MIN2(intrin->num_components - src_comp,
417 (4 - component) / 2);
418
419 if (write_mask & BITFIELD_MASK(num_comps)) {
420 nir_ssa_def *data =
421 nir_channels(b, intrin->src[1].ssa,
422 BITFIELD_RANGE(src_comp, num_comps));
423 nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
424
425 nir_component_mask_t write_mask32 = 0;
426 for (unsigned i = 0; i < num_comps; i++) {
427 if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
428 write_mask32 |= 3 << (i * 2);
429 }
430
431 emit_store(state, data32, vertex_index, var, offset,
432 component, data32->num_components, write_mask32,
433 nir_type_uint32);
434 }
435
436 /* Only the first store has a component offset */
437 component = 0;
438 src_comp += num_comps;
439 write_mask >>= num_comps;
440 offset = nir_iadd_imm(b, offset, slot_size);
441 }
442 } else {
443 emit_store(state, intrin->src[1].ssa, vertex_index, var, offset,
444 component, intrin->num_components,
445 nir_intrinsic_write_mask(intrin),
446 nir_get_nir_type_for_glsl_type(type));
447 }
448 }
449
450 static nir_ssa_def *
451 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
452 nir_variable *var, nir_ssa_def *offset)
453 {
454 nir_builder *b = &state->builder;
455 assert(var->data.mode == nir_var_mem_shared);
456
457 nir_intrinsic_op op = shared_atomic_for_deref(intrin->intrinsic);
458
459 nir_intrinsic_instr *atomic =
460 nir_intrinsic_instr_create(state->builder.shader, op);
461
462 nir_intrinsic_set_base(atomic, var->data.driver_location);
463
464 atomic->src[0] = nir_src_for_ssa(offset);
465 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs ==
466 nir_intrinsic_infos[op].num_srcs);
467 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) {
468 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic);
469 }
470
471 if (nir_intrinsic_infos[op].has_dest) {
472 assert(intrin->dest.is_ssa);
473 assert(nir_intrinsic_infos[intrin->intrinsic].has_dest);
474 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
475 intrin->dest.ssa.num_components,
476 intrin->dest.ssa.bit_size, NULL);
477 }
478
479 nir_builder_instr_insert(b, &atomic->instr);
480
481 return nir_intrinsic_infos[op].has_dest ? &atomic->dest.ssa : NULL;
482 }
483
484 static nir_ssa_def *
485 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
486 nir_variable *var, nir_ssa_def *offset, unsigned component,
487 const struct glsl_type *type)
488 {
489 nir_builder *b = &state->builder;
490 assert(var->data.mode == nir_var_shader_in);
491
492 /* Ignore interpolateAt() for flat variables - flat is flat. */
493 if (var->data.interpolation == INTERP_MODE_FLAT)
494 return lower_load(intrin, state, NULL, var, offset, component, type);
495
496 /* None of the supported APIs allow interpolation on 64-bit things */
497 assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
498
499 nir_intrinsic_op bary_op;
500 switch (intrin->intrinsic) {
501 case nir_intrinsic_interp_deref_at_centroid:
502 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
503 nir_intrinsic_load_barycentric_sample :
504 nir_intrinsic_load_barycentric_centroid;
505 break;
506 case nir_intrinsic_interp_deref_at_sample:
507 bary_op = nir_intrinsic_load_barycentric_at_sample;
508 break;
509 case nir_intrinsic_interp_deref_at_offset:
510 bary_op = nir_intrinsic_load_barycentric_at_offset;
511 break;
512 default:
513 unreachable("Bogus interpolateAt() intrinsic.");
514 }
515
516 nir_intrinsic_instr *bary_setup =
517 nir_intrinsic_instr_create(state->builder.shader, bary_op);
518
519 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
520 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
521
522 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
523 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset)
524 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
525
526 nir_builder_instr_insert(b, &bary_setup->instr);
527
528 nir_intrinsic_instr *load =
529 nir_intrinsic_instr_create(state->builder.shader,
530 nir_intrinsic_load_interpolated_input);
531 load->num_components = intrin->num_components;
532
533 nir_intrinsic_set_base(load, var->data.driver_location);
534 nir_intrinsic_set_component(load, component);
535
536 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
537 load->src[1] = nir_src_for_ssa(offset);
538
539 assert(intrin->dest.is_ssa);
540 nir_ssa_dest_init(&load->instr, &load->dest,
541 intrin->dest.ssa.num_components,
542 intrin->dest.ssa.bit_size, NULL);
543 nir_builder_instr_insert(b, &load->instr);
544
545 return &load->dest.ssa;
546 }
547
548 static bool
549 nir_lower_io_block(nir_block *block,
550 struct lower_io_state *state)
551 {
552 nir_builder *b = &state->builder;
553 const nir_shader_compiler_options *options = b->shader->options;
554 bool progress = false;
555
556 nir_foreach_instr_safe(instr, block) {
557 if (instr->type != nir_instr_type_intrinsic)
558 continue;
559
560 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
561
562 switch (intrin->intrinsic) {
563 case nir_intrinsic_load_deref:
564 case nir_intrinsic_store_deref:
565 case nir_intrinsic_deref_atomic_add:
566 case nir_intrinsic_deref_atomic_imin:
567 case nir_intrinsic_deref_atomic_umin:
568 case nir_intrinsic_deref_atomic_imax:
569 case nir_intrinsic_deref_atomic_umax:
570 case nir_intrinsic_deref_atomic_and:
571 case nir_intrinsic_deref_atomic_or:
572 case nir_intrinsic_deref_atomic_xor:
573 case nir_intrinsic_deref_atomic_exchange:
574 case nir_intrinsic_deref_atomic_comp_swap:
575 case nir_intrinsic_deref_atomic_fadd:
576 case nir_intrinsic_deref_atomic_fmin:
577 case nir_intrinsic_deref_atomic_fmax:
578 case nir_intrinsic_deref_atomic_fcomp_swap:
579 /* We can lower the io for this nir instrinsic */
580 break;
581 case nir_intrinsic_interp_deref_at_centroid:
582 case nir_intrinsic_interp_deref_at_sample:
583 case nir_intrinsic_interp_deref_at_offset:
584 /* We can optionally lower these to load_interpolated_input */
585 if (options->use_interpolated_input_intrinsics)
586 break;
587 default:
588 /* We can't lower the io for this nir instrinsic, so skip it */
589 continue;
590 }
591
592 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
593
594 nir_variable_mode mode = deref->mode;
595
596 if ((state->modes & mode) == 0)
597 continue;
598
599 if (mode != nir_var_shader_in &&
600 mode != nir_var_shader_out &&
601 mode != nir_var_mem_shared &&
602 mode != nir_var_uniform)
603 continue;
604
605 nir_variable *var = nir_deref_instr_get_variable(deref);
606
607 b->cursor = nir_before_instr(instr);
608
609 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
610
611 nir_ssa_def *offset;
612 nir_ssa_def *vertex_index = NULL;
613 unsigned component_offset = var->data.location_frac;
614 bool bindless_type_size = mode == nir_var_shader_in ||
615 mode == nir_var_shader_out ||
616 var->data.bindless;
617
618 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
619 state->type_size, &component_offset,
620 bindless_type_size);
621
622 nir_ssa_def *replacement = NULL;
623
624 switch (intrin->intrinsic) {
625 case nir_intrinsic_load_deref:
626 replacement = lower_load(intrin, state, vertex_index, var, offset,
627 component_offset, deref->type);
628 break;
629
630 case nir_intrinsic_store_deref:
631 lower_store(intrin, state, vertex_index, var, offset,
632 component_offset, deref->type);
633 break;
634
635 case nir_intrinsic_deref_atomic_add:
636 case nir_intrinsic_deref_atomic_imin:
637 case nir_intrinsic_deref_atomic_umin:
638 case nir_intrinsic_deref_atomic_imax:
639 case nir_intrinsic_deref_atomic_umax:
640 case nir_intrinsic_deref_atomic_and:
641 case nir_intrinsic_deref_atomic_or:
642 case nir_intrinsic_deref_atomic_xor:
643 case nir_intrinsic_deref_atomic_exchange:
644 case nir_intrinsic_deref_atomic_comp_swap:
645 case nir_intrinsic_deref_atomic_fadd:
646 case nir_intrinsic_deref_atomic_fmin:
647 case nir_intrinsic_deref_atomic_fmax:
648 case nir_intrinsic_deref_atomic_fcomp_swap:
649 assert(vertex_index == NULL);
650 replacement = lower_atomic(intrin, state, var, offset);
651 break;
652
653 case nir_intrinsic_interp_deref_at_centroid:
654 case nir_intrinsic_interp_deref_at_sample:
655 case nir_intrinsic_interp_deref_at_offset:
656 assert(vertex_index == NULL);
657 replacement = lower_interpolate_at(intrin, state, var, offset,
658 component_offset, deref->type);
659 break;
660
661 default:
662 continue;
663 }
664
665 if (replacement) {
666 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
667 nir_src_for_ssa(replacement));
668 }
669 nir_instr_remove(&intrin->instr);
670 progress = true;
671 }
672
673 return progress;
674 }
675
676 static bool
677 nir_lower_io_impl(nir_function_impl *impl,
678 nir_variable_mode modes,
679 int (*type_size)(const struct glsl_type *, bool),
680 nir_lower_io_options options)
681 {
682 struct lower_io_state state;
683 bool progress = false;
684
685 nir_builder_init(&state.builder, impl);
686 state.dead_ctx = ralloc_context(NULL);
687 state.modes = modes;
688 state.type_size = type_size;
689 state.options = options;
690
691 nir_foreach_block(block, impl) {
692 progress |= nir_lower_io_block(block, &state);
693 }
694
695 ralloc_free(state.dead_ctx);
696
697 nir_metadata_preserve(impl, nir_metadata_block_index |
698 nir_metadata_dominance);
699 return progress;
700 }
701
702 bool
703 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
704 int (*type_size)(const struct glsl_type *, bool),
705 nir_lower_io_options options)
706 {
707 bool progress = false;
708
709 nir_foreach_function(function, shader) {
710 if (function->impl) {
711 progress |= nir_lower_io_impl(function->impl, modes,
712 type_size, options);
713 }
714 }
715
716 return progress;
717 }
718
719 static unsigned
720 type_scalar_size_bytes(const struct glsl_type *type)
721 {
722 assert(glsl_type_is_vector_or_scalar(type) ||
723 glsl_type_is_matrix(type));
724 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
725 }
726
727 static nir_ssa_def *
728 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
729 nir_address_format addr_format, nir_ssa_def *offset)
730 {
731 assert(offset->num_components == 1);
732 assert(addr->bit_size == offset->bit_size);
733
734 switch (addr_format) {
735 case nir_address_format_32bit_global:
736 case nir_address_format_64bit_global:
737 case nir_address_format_32bit_offset:
738 assert(addr->num_components == 1);
739 return nir_iadd(b, addr, offset);
740
741 case nir_address_format_64bit_bounded_global:
742 assert(addr->num_components == 4);
743 return nir_vec4(b, nir_channel(b, addr, 0),
744 nir_channel(b, addr, 1),
745 nir_channel(b, addr, 2),
746 nir_iadd(b, nir_channel(b, addr, 3), offset));
747
748 case nir_address_format_32bit_index_offset:
749 assert(addr->num_components == 2);
750 return nir_vec2(b, nir_channel(b, addr, 0),
751 nir_iadd(b, nir_channel(b, addr, 1), offset));
752 case nir_address_format_logical:
753 unreachable("Unsupported address format");
754 }
755 unreachable("Invalid address format");
756 }
757
758 static nir_ssa_def *
759 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
760 nir_address_format addr_format, int64_t offset)
761 {
762 return build_addr_iadd(b, addr, addr_format,
763 nir_imm_intN_t(b, offset, addr->bit_size));
764 }
765
766 static nir_ssa_def *
767 addr_to_index(nir_builder *b, nir_ssa_def *addr,
768 nir_address_format addr_format)
769 {
770 assert(addr_format == nir_address_format_32bit_index_offset);
771 assert(addr->num_components == 2);
772 return nir_channel(b, addr, 0);
773 }
774
775 static nir_ssa_def *
776 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
777 nir_address_format addr_format)
778 {
779 assert(addr_format == nir_address_format_32bit_index_offset);
780 assert(addr->num_components == 2);
781 return nir_channel(b, addr, 1);
782 }
783
784 /** Returns true if the given address format resolves to a global address */
785 static bool
786 addr_format_is_global(nir_address_format addr_format)
787 {
788 return addr_format == nir_address_format_32bit_global ||
789 addr_format == nir_address_format_64bit_global ||
790 addr_format == nir_address_format_64bit_bounded_global;
791 }
792
793 static nir_ssa_def *
794 addr_to_global(nir_builder *b, nir_ssa_def *addr,
795 nir_address_format addr_format)
796 {
797 switch (addr_format) {
798 case nir_address_format_32bit_global:
799 case nir_address_format_64bit_global:
800 assert(addr->num_components == 1);
801 return addr;
802
803 case nir_address_format_64bit_bounded_global:
804 assert(addr->num_components == 4);
805 return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
806 nir_u2u64(b, nir_channel(b, addr, 3)));
807
808 case nir_address_format_32bit_index_offset:
809 case nir_address_format_32bit_offset:
810 case nir_address_format_logical:
811 unreachable("Cannot get a 64-bit address with this address format");
812 }
813
814 unreachable("Invalid address format");
815 }
816
817 static bool
818 addr_format_needs_bounds_check(nir_address_format addr_format)
819 {
820 return addr_format == nir_address_format_64bit_bounded_global;
821 }
822
823 static nir_ssa_def *
824 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
825 nir_address_format addr_format, unsigned size)
826 {
827 assert(addr_format == nir_address_format_64bit_bounded_global);
828 assert(addr->num_components == 4);
829 return nir_ige(b, nir_channel(b, addr, 2),
830 nir_iadd_imm(b, nir_channel(b, addr, 3), size));
831 }
832
833 static nir_ssa_def *
834 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
835 nir_ssa_def *addr, nir_address_format addr_format,
836 unsigned num_components)
837 {
838 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
839
840 nir_intrinsic_op op;
841 switch (mode) {
842 case nir_var_mem_ubo:
843 op = nir_intrinsic_load_ubo;
844 break;
845 case nir_var_mem_ssbo:
846 if (addr_format_is_global(addr_format))
847 op = nir_intrinsic_load_global;
848 else
849 op = nir_intrinsic_load_ssbo;
850 break;
851 case nir_var_mem_global:
852 assert(addr_format_is_global(addr_format));
853 op = nir_intrinsic_load_global;
854 break;
855 case nir_var_shader_in:
856 assert(addr_format_is_global(addr_format));
857 op = nir_intrinsic_load_kernel_input;
858 break;
859 case nir_var_mem_shared:
860 assert(addr_format == nir_address_format_32bit_offset);
861 op = nir_intrinsic_load_shared;
862 break;
863 default:
864 unreachable("Unsupported explicit IO variable mode");
865 }
866
867 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
868
869 if (addr_format_is_global(addr_format)) {
870 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
871 } else if (addr_format == nir_address_format_32bit_offset) {
872 assert(addr->num_components == 1);
873 load->src[0] = nir_src_for_ssa(addr);
874 } else {
875 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
876 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
877 }
878
879 if (mode != nir_var_mem_ubo && mode != nir_var_shader_in && mode != nir_var_mem_shared)
880 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
881
882 unsigned bit_size = intrin->dest.ssa.bit_size;
883 if (bit_size == 1) {
884 /* TODO: Make the native bool bit_size an option. */
885 bit_size = 32;
886 }
887
888 /* TODO: We should try and provide a better alignment. For OpenCL, we need
889 * to plumb the alignment through from SPIR-V when we have one.
890 */
891 nir_intrinsic_set_align(load, bit_size / 8, 0);
892
893 assert(intrin->dest.is_ssa);
894 load->num_components = num_components;
895 nir_ssa_dest_init(&load->instr, &load->dest, num_components,
896 bit_size, intrin->dest.ssa.name);
897
898 assert(bit_size % 8 == 0);
899
900 nir_ssa_def *result;
901 if (addr_format_needs_bounds_check(addr_format)) {
902 /* The Vulkan spec for robustBufferAccess gives us quite a few options
903 * as to what we can do with an OOB read. Unfortunately, returning
904 * undefined values isn't one of them so we return an actual zero.
905 */
906 nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
907
908 const unsigned load_size = (bit_size / 8) * load->num_components;
909 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
910
911 nir_builder_instr_insert(b, &load->instr);
912
913 nir_pop_if(b, NULL);
914
915 result = nir_if_phi(b, &load->dest.ssa, zero);
916 } else {
917 nir_builder_instr_insert(b, &load->instr);
918 result = &load->dest.ssa;
919 }
920
921 if (intrin->dest.ssa.bit_size == 1)
922 result = nir_i2b(b, result);
923
924 return result;
925 }
926
927 static void
928 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
929 nir_ssa_def *addr, nir_address_format addr_format,
930 nir_ssa_def *value, nir_component_mask_t write_mask)
931 {
932 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
933
934 nir_intrinsic_op op;
935 switch (mode) {
936 case nir_var_mem_ssbo:
937 if (addr_format_is_global(addr_format))
938 op = nir_intrinsic_store_global;
939 else
940 op = nir_intrinsic_store_ssbo;
941 break;
942 case nir_var_mem_global:
943 assert(addr_format_is_global(addr_format));
944 op = nir_intrinsic_store_global;
945 break;
946 case nir_var_mem_shared:
947 assert(addr_format == nir_address_format_32bit_offset);
948 op = nir_intrinsic_store_shared;
949 break;
950 default:
951 unreachable("Unsupported explicit IO variable mode");
952 }
953
954 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
955
956 if (value->bit_size == 1) {
957 /* TODO: Make the native bool bit_size an option. */
958 value = nir_b2i(b, value, 32);
959 }
960
961 store->src[0] = nir_src_for_ssa(value);
962 if (addr_format_is_global(addr_format)) {
963 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
964 } else if (addr_format == nir_address_format_32bit_offset) {
965 assert(addr->num_components == 1);
966 store->src[1] = nir_src_for_ssa(addr);
967 } else {
968 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
969 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
970 }
971
972 nir_intrinsic_set_write_mask(store, write_mask);
973
974 if (mode != nir_var_mem_shared)
975 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
976
977 /* TODO: We should try and provide a better alignment. For OpenCL, we need
978 * to plumb the alignment through from SPIR-V when we have one.
979 */
980 nir_intrinsic_set_align(store, value->bit_size / 8, 0);
981
982 assert(value->num_components == 1 ||
983 value->num_components == intrin->num_components);
984 store->num_components = value->num_components;
985
986 assert(value->bit_size % 8 == 0);
987
988 if (addr_format_needs_bounds_check(addr_format)) {
989 const unsigned store_size = (value->bit_size / 8) * store->num_components;
990 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
991
992 nir_builder_instr_insert(b, &store->instr);
993
994 nir_pop_if(b, NULL);
995 } else {
996 nir_builder_instr_insert(b, &store->instr);
997 }
998 }
999
1000 static nir_ssa_def *
1001 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1002 nir_ssa_def *addr, nir_address_format addr_format)
1003 {
1004 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
1005 const unsigned num_data_srcs =
1006 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1007
1008 nir_intrinsic_op op;
1009 switch (mode) {
1010 case nir_var_mem_ssbo:
1011 if (addr_format_is_global(addr_format))
1012 op = global_atomic_for_deref(intrin->intrinsic);
1013 else
1014 op = ssbo_atomic_for_deref(intrin->intrinsic);
1015 break;
1016 case nir_var_mem_global:
1017 assert(addr_format_is_global(addr_format));
1018 op = global_atomic_for_deref(intrin->intrinsic);
1019 break;
1020 case nir_var_mem_shared:
1021 assert(addr_format == nir_address_format_32bit_offset);
1022 op = shared_atomic_for_deref(intrin->intrinsic);
1023 break;
1024 default:
1025 unreachable("Unsupported explicit IO variable mode");
1026 }
1027
1028 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1029
1030 unsigned src = 0;
1031 if (addr_format_is_global(addr_format)) {
1032 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1033 } else if (addr_format == nir_address_format_32bit_offset) {
1034 assert(addr->num_components == 1);
1035 atomic->src[src++] = nir_src_for_ssa(addr);
1036 } else {
1037 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1038 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1039 }
1040 for (unsigned i = 0; i < num_data_srcs; i++) {
1041 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1042 }
1043
1044 /* Global atomics don't have access flags because they assume that the
1045 * address may be non-uniform.
1046 */
1047 if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared)
1048 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1049
1050 assert(intrin->dest.ssa.num_components == 1);
1051 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
1052 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
1053
1054 assert(atomic->dest.ssa.bit_size % 8 == 0);
1055
1056 if (addr_format_needs_bounds_check(addr_format)) {
1057 const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
1058 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1059
1060 nir_builder_instr_insert(b, &atomic->instr);
1061
1062 nir_pop_if(b, NULL);
1063 return nir_if_phi(b, &atomic->dest.ssa,
1064 nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
1065 } else {
1066 nir_builder_instr_insert(b, &atomic->instr);
1067 return &atomic->dest.ssa;
1068 }
1069 }
1070
1071 nir_ssa_def *
1072 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1073 nir_ssa_def *base_addr,
1074 nir_address_format addr_format)
1075 {
1076 assert(deref->dest.is_ssa);
1077 switch (deref->deref_type) {
1078 case nir_deref_type_var:
1079 assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared));
1080 return nir_imm_intN_t(b, deref->var->data.driver_location,
1081 deref->dest.ssa.bit_size);
1082
1083 case nir_deref_type_array: {
1084 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1085
1086 unsigned stride = glsl_get_explicit_stride(parent->type);
1087 if ((glsl_type_is_matrix(parent->type) &&
1088 glsl_matrix_type_is_row_major(parent->type)) ||
1089 (glsl_type_is_vector(parent->type) && stride == 0))
1090 stride = type_scalar_size_bytes(parent->type);
1091
1092 assert(stride > 0);
1093
1094 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1095 index = nir_i2i(b, index, base_addr->bit_size);
1096 return build_addr_iadd(b, base_addr, addr_format,
1097 nir_amul_imm(b, index, stride));
1098 }
1099
1100 case nir_deref_type_ptr_as_array: {
1101 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1102 index = nir_i2i(b, index, base_addr->bit_size);
1103 unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
1104 return build_addr_iadd(b, base_addr, addr_format,
1105 nir_amul_imm(b, index, stride));
1106 }
1107
1108 case nir_deref_type_array_wildcard:
1109 unreachable("Wildcards should be lowered by now");
1110 break;
1111
1112 case nir_deref_type_struct: {
1113 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1114 int offset = glsl_get_struct_field_offset(parent->type,
1115 deref->strct.index);
1116 assert(offset >= 0);
1117 return build_addr_iadd_imm(b, base_addr, addr_format, offset);
1118 }
1119
1120 case nir_deref_type_cast:
1121 /* Nothing to do here */
1122 return base_addr;
1123 }
1124
1125 unreachable("Invalid NIR deref type");
1126 }
1127
1128 void
1129 nir_lower_explicit_io_instr(nir_builder *b,
1130 nir_intrinsic_instr *intrin,
1131 nir_ssa_def *addr,
1132 nir_address_format addr_format)
1133 {
1134 b->cursor = nir_after_instr(&intrin->instr);
1135
1136 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1137 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1138 unsigned scalar_size = type_scalar_size_bytes(deref->type);
1139 assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
1140 assert(vec_stride == 0 || vec_stride >= scalar_size);
1141
1142 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1143 nir_ssa_def *value;
1144 if (vec_stride > scalar_size) {
1145 nir_ssa_def *comps[4] = { NULL, };
1146 for (unsigned i = 0; i < intrin->num_components; i++) {
1147 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1148 vec_stride * i);
1149 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1150 addr_format, 1);
1151 }
1152 value = nir_vec(b, comps, intrin->num_components);
1153 } else {
1154 value = build_explicit_io_load(b, intrin, addr, addr_format,
1155 intrin->num_components);
1156 }
1157 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1158 } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
1159 assert(intrin->src[1].is_ssa);
1160 nir_ssa_def *value = intrin->src[1].ssa;
1161 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1162 if (vec_stride > scalar_size) {
1163 for (unsigned i = 0; i < intrin->num_components; i++) {
1164 if (!(write_mask & (1 << i)))
1165 continue;
1166
1167 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1168 vec_stride * i);
1169 build_explicit_io_store(b, intrin, comp_addr, addr_format,
1170 nir_channel(b, value, i), 1);
1171 }
1172 } else {
1173 build_explicit_io_store(b, intrin, addr, addr_format,
1174 value, write_mask);
1175 }
1176 } else {
1177 nir_ssa_def *value =
1178 build_explicit_io_atomic(b, intrin, addr, addr_format);
1179 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1180 }
1181
1182 nir_instr_remove(&intrin->instr);
1183 }
1184
1185 static void
1186 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
1187 nir_address_format addr_format)
1188 {
1189 /* Just delete the deref if it's not used. We can't use
1190 * nir_deref_instr_remove_if_unused here because it may remove more than
1191 * one deref which could break our list walking since we walk the list
1192 * backwards.
1193 */
1194 assert(list_is_empty(&deref->dest.ssa.if_uses));
1195 if (list_is_empty(&deref->dest.ssa.uses)) {
1196 nir_instr_remove(&deref->instr);
1197 return;
1198 }
1199
1200 b->cursor = nir_after_instr(&deref->instr);
1201
1202 nir_ssa_def *base_addr = NULL;
1203 if (deref->deref_type != nir_deref_type_var) {
1204 assert(deref->parent.is_ssa);
1205 base_addr = deref->parent.ssa;
1206 }
1207
1208 nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
1209 addr_format);
1210
1211 nir_instr_remove(&deref->instr);
1212 nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
1213 }
1214
1215 static void
1216 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
1217 nir_address_format addr_format)
1218 {
1219 assert(intrin->src[0].is_ssa);
1220 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
1221 }
1222
1223 static void
1224 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
1225 nir_address_format addr_format)
1226 {
1227 b->cursor = nir_after_instr(&intrin->instr);
1228
1229 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1230
1231 assert(glsl_type_is_array(deref->type));
1232 assert(glsl_get_length(deref->type) == 0);
1233 unsigned stride = glsl_get_explicit_stride(deref->type);
1234 assert(stride > 0);
1235
1236 assert(addr_format == nir_address_format_32bit_index_offset);
1237 nir_ssa_def *addr = &deref->dest.ssa;
1238 nir_ssa_def *index = addr_to_index(b, addr, addr_format);
1239 nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
1240
1241 nir_intrinsic_instr *bsize =
1242 nir_intrinsic_instr_create(b->shader, nir_intrinsic_get_buffer_size);
1243 bsize->src[0] = nir_src_for_ssa(index);
1244 nir_ssa_dest_init(&bsize->instr, &bsize->dest, 1, 32, NULL);
1245 nir_builder_instr_insert(b, &bsize->instr);
1246
1247 nir_ssa_def *arr_size =
1248 nir_idiv(b, nir_isub(b, &bsize->dest.ssa, offset),
1249 nir_imm_int(b, stride));
1250
1251 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(arr_size));
1252 nir_instr_remove(&intrin->instr);
1253 }
1254
1255 static bool
1256 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
1257 nir_address_format addr_format)
1258 {
1259 bool progress = false;
1260
1261 nir_builder b;
1262 nir_builder_init(&b, impl);
1263
1264 /* Walk in reverse order so that we can see the full deref chain when we
1265 * lower the access operations. We lower them assuming that the derefs
1266 * will be turned into address calculations later.
1267 */
1268 nir_foreach_block_reverse(block, impl) {
1269 nir_foreach_instr_reverse_safe(instr, block) {
1270 switch (instr->type) {
1271 case nir_instr_type_deref: {
1272 nir_deref_instr *deref = nir_instr_as_deref(instr);
1273 if (deref->mode & modes) {
1274 lower_explicit_io_deref(&b, deref, addr_format);
1275 progress = true;
1276 }
1277 break;
1278 }
1279
1280 case nir_instr_type_intrinsic: {
1281 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1282 switch (intrin->intrinsic) {
1283 case nir_intrinsic_load_deref:
1284 case nir_intrinsic_store_deref:
1285 case nir_intrinsic_deref_atomic_add:
1286 case nir_intrinsic_deref_atomic_imin:
1287 case nir_intrinsic_deref_atomic_umin:
1288 case nir_intrinsic_deref_atomic_imax:
1289 case nir_intrinsic_deref_atomic_umax:
1290 case nir_intrinsic_deref_atomic_and:
1291 case nir_intrinsic_deref_atomic_or:
1292 case nir_intrinsic_deref_atomic_xor:
1293 case nir_intrinsic_deref_atomic_exchange:
1294 case nir_intrinsic_deref_atomic_comp_swap:
1295 case nir_intrinsic_deref_atomic_fadd:
1296 case nir_intrinsic_deref_atomic_fmin:
1297 case nir_intrinsic_deref_atomic_fmax:
1298 case nir_intrinsic_deref_atomic_fcomp_swap: {
1299 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1300 if (deref->mode & modes) {
1301 lower_explicit_io_access(&b, intrin, addr_format);
1302 progress = true;
1303 }
1304 break;
1305 }
1306
1307 case nir_intrinsic_deref_buffer_array_length: {
1308 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1309 if (deref->mode & modes) {
1310 lower_explicit_io_array_length(&b, intrin, addr_format);
1311 progress = true;
1312 }
1313 break;
1314 }
1315
1316 default:
1317 break;
1318 }
1319 break;
1320 }
1321
1322 default:
1323 /* Nothing to do */
1324 break;
1325 }
1326 }
1327 }
1328
1329 if (progress) {
1330 nir_metadata_preserve(impl, nir_metadata_block_index |
1331 nir_metadata_dominance);
1332 }
1333
1334 return progress;
1335 }
1336
1337 bool
1338 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
1339 nir_address_format addr_format)
1340 {
1341 bool progress = false;
1342
1343 nir_foreach_function(function, shader) {
1344 if (function->impl &&
1345 nir_lower_explicit_io_impl(function->impl, modes, addr_format))
1346 progress = true;
1347 }
1348
1349 return progress;
1350 }
1351
1352 static bool
1353 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
1354 nir_variable_mode modes,
1355 glsl_type_size_align_func type_info)
1356 {
1357 bool progress = false;
1358
1359 nir_foreach_block(block, impl) {
1360 nir_foreach_instr(instr, block) {
1361 if (instr->type != nir_instr_type_deref)
1362 continue;
1363
1364 nir_deref_instr *deref = nir_instr_as_deref(instr);
1365 if (!(deref->mode & modes))
1366 continue;
1367
1368 unsigned size, alignment;
1369 const struct glsl_type *new_type =
1370 glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
1371 if (new_type != deref->type) {
1372 progress = true;
1373 deref->type = new_type;
1374 }
1375 if (deref->deref_type == nir_deref_type_cast) {
1376 /* See also glsl_type::get_explicit_type_for_size_align() */
1377 unsigned new_stride = align(size, alignment);
1378 if (new_stride != deref->cast.ptr_stride) {
1379 deref->cast.ptr_stride = new_stride;
1380 progress = true;
1381 }
1382 }
1383 }
1384 }
1385
1386 if (progress) {
1387 nir_metadata_preserve(impl, nir_metadata_block_index |
1388 nir_metadata_dominance |
1389 nir_metadata_live_ssa_defs |
1390 nir_metadata_loop_analysis);
1391 }
1392
1393 return progress;
1394 }
1395
1396 static bool
1397 lower_vars_to_explicit(nir_shader *shader,
1398 struct exec_list *vars, nir_variable_mode mode,
1399 glsl_type_size_align_func type_info)
1400 {
1401 bool progress = false;
1402 unsigned offset = 0;
1403 nir_foreach_variable(var, vars) {
1404 unsigned size, align;
1405 const struct glsl_type *explicit_type =
1406 glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
1407
1408 if (explicit_type != var->type) {
1409 progress = true;
1410 var->type = explicit_type;
1411 }
1412
1413 var->data.driver_location = ALIGN_POT(offset, align);
1414 offset = var->data.driver_location + size;
1415 }
1416
1417 if (mode == nir_var_mem_shared) {
1418 shader->info.cs.shared_size = offset;
1419 shader->num_shared = offset;
1420 }
1421
1422 return progress;
1423 }
1424
1425 bool
1426 nir_lower_vars_to_explicit_types(nir_shader *shader,
1427 nir_variable_mode modes,
1428 glsl_type_size_align_func type_info)
1429 {
1430 /* TODO: Situations which need to be handled to support more modes:
1431 * - row-major matrices
1432 * - compact shader inputs/outputs
1433 * - interface types
1434 */
1435 nir_variable_mode supported = nir_var_mem_shared | nir_var_shader_temp | nir_var_function_temp;
1436 assert(!(modes & ~supported) && "unsupported");
1437
1438 bool progress = false;
1439
1440 if (modes & nir_var_mem_shared)
1441 progress |= lower_vars_to_explicit(shader, &shader->shared, nir_var_mem_shared, type_info);
1442 if (modes & nir_var_shader_temp)
1443 progress |= lower_vars_to_explicit(shader, &shader->globals, nir_var_shader_temp, type_info);
1444
1445 nir_foreach_function(function, shader) {
1446 if (function->impl) {
1447 if (modes & nir_var_function_temp)
1448 progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
1449
1450 progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
1451 }
1452 }
1453
1454 return progress;
1455 }
1456
1457 /**
1458 * Return the offset source for a load/store intrinsic.
1459 */
1460 nir_src *
1461 nir_get_io_offset_src(nir_intrinsic_instr *instr)
1462 {
1463 switch (instr->intrinsic) {
1464 case nir_intrinsic_load_input:
1465 case nir_intrinsic_load_output:
1466 case nir_intrinsic_load_shared:
1467 case nir_intrinsic_load_uniform:
1468 case nir_intrinsic_load_global:
1469 case nir_intrinsic_load_scratch:
1470 case nir_intrinsic_load_fs_input_interp_deltas:
1471 return &instr->src[0];
1472 case nir_intrinsic_load_ubo:
1473 case nir_intrinsic_load_ssbo:
1474 case nir_intrinsic_load_per_vertex_input:
1475 case nir_intrinsic_load_per_vertex_output:
1476 case nir_intrinsic_load_interpolated_input:
1477 case nir_intrinsic_store_output:
1478 case nir_intrinsic_store_shared:
1479 case nir_intrinsic_store_global:
1480 case nir_intrinsic_store_scratch:
1481 return &instr->src[1];
1482 case nir_intrinsic_store_ssbo:
1483 case nir_intrinsic_store_per_vertex_output:
1484 return &instr->src[2];
1485 default:
1486 return NULL;
1487 }
1488 }
1489
1490 /**
1491 * Return the vertex index source for a load/store per_vertex intrinsic.
1492 */
1493 nir_src *
1494 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
1495 {
1496 switch (instr->intrinsic) {
1497 case nir_intrinsic_load_per_vertex_input:
1498 case nir_intrinsic_load_per_vertex_output:
1499 return &instr->src[0];
1500 case nir_intrinsic_store_per_vertex_output:
1501 return &instr->src[1];
1502 default:
1503 return NULL;
1504 }
1505 }
1506
1507 /**
1508 * Return the numeric constant that identify a NULL pointer for each address
1509 * format.
1510 */
1511 const nir_const_value *
1512 nir_address_format_null_value(nir_address_format addr_format)
1513 {
1514 const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
1515 [nir_address_format_32bit_global] = {{0}},
1516 [nir_address_format_64bit_global] = {{0}},
1517 [nir_address_format_64bit_bounded_global] = {{0}},
1518 [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
1519 [nir_address_format_32bit_offset] = {{.u32 = ~0}},
1520 [nir_address_format_logical] = {{.u32 = ~0}},
1521 };
1522
1523 assert(addr_format < ARRAY_SIZE(null_values));
1524 return null_values[addr_format];
1525 }
1526
1527 nir_ssa_def *
1528 nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
1529 nir_address_format addr_format)
1530 {
1531 switch (addr_format) {
1532 case nir_address_format_32bit_global:
1533 case nir_address_format_64bit_global:
1534 case nir_address_format_64bit_bounded_global:
1535 case nir_address_format_32bit_index_offset:
1536 case nir_address_format_32bit_offset:
1537 return nir_ball_iequal(b, addr0, addr1);
1538
1539 case nir_address_format_logical:
1540 unreachable("Unsupported address format");
1541 }
1542
1543 unreachable("Invalid address format");
1544 }
1545
1546 nir_ssa_def *
1547 nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
1548 nir_address_format addr_format)
1549 {
1550 switch (addr_format) {
1551 case nir_address_format_32bit_global:
1552 case nir_address_format_64bit_global:
1553 case nir_address_format_32bit_offset:
1554 assert(addr0->num_components == 1);
1555 assert(addr1->num_components == 1);
1556 return nir_isub(b, addr0, addr1);
1557
1558 case nir_address_format_64bit_bounded_global:
1559 return nir_isub(b, addr_to_global(b, addr0, addr_format),
1560 addr_to_global(b, addr1, addr_format));
1561
1562 case nir_address_format_32bit_index_offset:
1563 assert(addr0->num_components == 2);
1564 assert(addr1->num_components == 2);
1565 /* Assume the same buffer index. */
1566 return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
1567
1568 case nir_address_format_logical:
1569 unreachable("Unsupported address format");
1570 }
1571
1572 unreachable("Invalid address format");
1573 }
1574
1575 static bool
1576 is_input(nir_intrinsic_instr *intrin)
1577 {
1578 return intrin->intrinsic == nir_intrinsic_load_input ||
1579 intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
1580 intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
1581 intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
1582 }
1583
1584 static bool
1585 is_output(nir_intrinsic_instr *intrin)
1586 {
1587 return intrin->intrinsic == nir_intrinsic_load_output ||
1588 intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
1589 intrin->intrinsic == nir_intrinsic_store_output ||
1590 intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
1591 }
1592
1593
1594 /**
1595 * This pass adds constant offsets to instr->const_index[0] for input/output
1596 * intrinsics, and resets the offset source to 0. Non-constant offsets remain
1597 * unchanged - since we don't know what part of a compound variable is
1598 * accessed, we allocate storage for the entire thing. For drivers that use
1599 * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
1600 * the offset source will be 0, so that they don't have to add it in manually.
1601 */
1602
1603 static bool
1604 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
1605 nir_variable_mode mode)
1606 {
1607 bool progress = false;
1608 nir_foreach_instr_safe(instr, block) {
1609 if (instr->type != nir_instr_type_intrinsic)
1610 continue;
1611
1612 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1613
1614 if ((mode == nir_var_shader_in && is_input(intrin)) ||
1615 (mode == nir_var_shader_out && is_output(intrin))) {
1616 nir_src *offset = nir_get_io_offset_src(intrin);
1617
1618 if (nir_src_is_const(*offset)) {
1619 intrin->const_index[0] += nir_src_as_uint(*offset);
1620 b->cursor = nir_before_instr(&intrin->instr);
1621 nir_instr_rewrite_src(&intrin->instr, offset,
1622 nir_src_for_ssa(nir_imm_int(b, 0)));
1623 progress = true;
1624 }
1625 }
1626 }
1627
1628 return progress;
1629 }
1630
1631 bool
1632 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode)
1633 {
1634 bool progress = false;
1635
1636 nir_foreach_function(f, nir) {
1637 if (f->impl) {
1638 nir_builder b;
1639 nir_builder_init(&b, f->impl);
1640 nir_foreach_block(block, f->impl) {
1641 progress |= add_const_offset_to_base_block(block, &b, mode);
1642 }
1643 }
1644 }
1645
1646 return progress;
1647 }
1648