6a0e18144131497a26ceea0583c74e5f9b7914b7
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37
38 #include "util/u_math.h"
39
40 struct lower_io_state {
41 void *dead_ctx;
42 nir_builder builder;
43 int (*type_size)(const struct glsl_type *type, bool);
44 nir_variable_mode modes;
45 nir_lower_io_options options;
46 };
47
48 static nir_intrinsic_op
49 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
50 {
51 switch (deref_op) {
52 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
53 OP(atomic_exchange)
54 OP(atomic_comp_swap)
55 OP(atomic_add)
56 OP(atomic_imin)
57 OP(atomic_umin)
58 OP(atomic_imax)
59 OP(atomic_umax)
60 OP(atomic_and)
61 OP(atomic_or)
62 OP(atomic_xor)
63 OP(atomic_fadd)
64 OP(atomic_fmin)
65 OP(atomic_fmax)
66 OP(atomic_fcomp_swap)
67 #undef OP
68 default:
69 unreachable("Invalid SSBO atomic");
70 }
71 }
72
73 static nir_intrinsic_op
74 global_atomic_for_deref(nir_intrinsic_op deref_op)
75 {
76 switch (deref_op) {
77 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
78 OP(atomic_exchange)
79 OP(atomic_comp_swap)
80 OP(atomic_add)
81 OP(atomic_imin)
82 OP(atomic_umin)
83 OP(atomic_imax)
84 OP(atomic_umax)
85 OP(atomic_and)
86 OP(atomic_or)
87 OP(atomic_xor)
88 OP(atomic_fadd)
89 OP(atomic_fmin)
90 OP(atomic_fmax)
91 OP(atomic_fcomp_swap)
92 #undef OP
93 default:
94 unreachable("Invalid SSBO atomic");
95 }
96 }
97
98 static nir_intrinsic_op
99 shared_atomic_for_deref(nir_intrinsic_op deref_op)
100 {
101 switch (deref_op) {
102 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
103 OP(atomic_exchange)
104 OP(atomic_comp_swap)
105 OP(atomic_add)
106 OP(atomic_imin)
107 OP(atomic_umin)
108 OP(atomic_imax)
109 OP(atomic_umax)
110 OP(atomic_and)
111 OP(atomic_or)
112 OP(atomic_xor)
113 OP(atomic_fadd)
114 OP(atomic_fmin)
115 OP(atomic_fmax)
116 OP(atomic_fcomp_swap)
117 #undef OP
118 default:
119 unreachable("Invalid shared atomic");
120 }
121 }
122
123 void
124 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
125 int (*type_size)(const struct glsl_type *, bool))
126 {
127 unsigned location = 0;
128
129 nir_foreach_variable(var, var_list) {
130 /*
131 * UBOs have their own address spaces, so don't count them towards the
132 * number of global uniforms
133 */
134 if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo)
135 continue;
136
137 var->data.driver_location = location;
138 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
139 var->data.mode == nir_var_shader_out ||
140 var->data.bindless;
141 location += type_size(var->type, bindless_type_size);
142 }
143
144 *size = location;
145 }
146
147 /**
148 * Return true if the given variable is a per-vertex input/output array.
149 * (such as geometry shader inputs).
150 */
151 bool
152 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
153 {
154 if (var->data.patch || !glsl_type_is_array(var->type))
155 return false;
156
157 if (var->data.mode == nir_var_shader_in)
158 return stage == MESA_SHADER_GEOMETRY ||
159 stage == MESA_SHADER_TESS_CTRL ||
160 stage == MESA_SHADER_TESS_EVAL;
161
162 if (var->data.mode == nir_var_shader_out)
163 return stage == MESA_SHADER_TESS_CTRL;
164
165 return false;
166 }
167
168 static nir_ssa_def *
169 get_io_offset(nir_builder *b, nir_deref_instr *deref,
170 nir_ssa_def **vertex_index,
171 int (*type_size)(const struct glsl_type *, bool),
172 unsigned *component, bool bts)
173 {
174 nir_deref_path path;
175 nir_deref_path_init(&path, deref, NULL);
176
177 assert(path.path[0]->deref_type == nir_deref_type_var);
178 nir_deref_instr **p = &path.path[1];
179
180 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
181 * outermost array index separate. Process the rest normally.
182 */
183 if (vertex_index != NULL) {
184 assert((*p)->deref_type == nir_deref_type_array);
185 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
186 p++;
187 }
188
189 if (path.path[0]->var->data.compact) {
190 assert((*p)->deref_type == nir_deref_type_array);
191 assert(glsl_type_is_scalar((*p)->type));
192
193 /* We always lower indirect dereferences for "compact" array vars. */
194 const unsigned index = nir_src_as_uint((*p)->arr.index);
195 const unsigned total_offset = *component + index;
196 const unsigned slot_offset = total_offset / 4;
197 *component = total_offset % 4;
198 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
199 }
200
201 /* Just emit code and let constant-folding go to town */
202 nir_ssa_def *offset = nir_imm_int(b, 0);
203
204 for (; *p; p++) {
205 if ((*p)->deref_type == nir_deref_type_array) {
206 unsigned size = type_size((*p)->type, bts);
207
208 nir_ssa_def *mul =
209 nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
210
211 offset = nir_iadd(b, offset, mul);
212 } else if ((*p)->deref_type == nir_deref_type_struct) {
213 /* p starts at path[1], so this is safe */
214 nir_deref_instr *parent = *(p - 1);
215
216 unsigned field_offset = 0;
217 for (unsigned i = 0; i < (*p)->strct.index; i++) {
218 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
219 }
220 offset = nir_iadd_imm(b, offset, field_offset);
221 } else {
222 unreachable("Unsupported deref type");
223 }
224 }
225
226 nir_deref_path_finish(&path);
227
228 return offset;
229 }
230
231 static nir_ssa_def *
232 emit_load(struct lower_io_state *state,
233 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
234 unsigned component, unsigned num_components, unsigned bit_size,
235 nir_alu_type type)
236 {
237 nir_builder *b = &state->builder;
238 const nir_shader *nir = b->shader;
239 nir_variable_mode mode = var->data.mode;
240 nir_ssa_def *barycentric = NULL;
241
242 nir_intrinsic_op op;
243 switch (mode) {
244 case nir_var_shader_in:
245 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
246 nir->options->use_interpolated_input_intrinsics &&
247 var->data.interpolation != INTERP_MODE_FLAT) {
248 if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
249 assert(vertex_index != NULL);
250 op = nir_intrinsic_load_input_vertex;
251 } else {
252 assert(vertex_index == NULL);
253
254 nir_intrinsic_op bary_op;
255 if (var->data.sample ||
256 (state->options & nir_lower_io_force_sample_interpolation))
257 bary_op = nir_intrinsic_load_barycentric_sample;
258 else if (var->data.centroid)
259 bary_op = nir_intrinsic_load_barycentric_centroid;
260 else
261 bary_op = nir_intrinsic_load_barycentric_pixel;
262
263 barycentric = nir_load_barycentric(&state->builder, bary_op,
264 var->data.interpolation);
265 op = nir_intrinsic_load_interpolated_input;
266 }
267 } else {
268 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
269 nir_intrinsic_load_input;
270 }
271 break;
272 case nir_var_shader_out:
273 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
274 nir_intrinsic_load_output;
275 break;
276 case nir_var_uniform:
277 op = nir_intrinsic_load_uniform;
278 break;
279 case nir_var_mem_shared:
280 op = nir_intrinsic_load_shared;
281 break;
282 default:
283 unreachable("Unknown variable mode");
284 }
285
286 nir_intrinsic_instr *load =
287 nir_intrinsic_instr_create(state->builder.shader, op);
288 load->num_components = num_components;
289
290 nir_intrinsic_set_base(load, var->data.driver_location);
291 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
292 nir_intrinsic_set_component(load, component);
293
294 if (load->intrinsic == nir_intrinsic_load_uniform)
295 nir_intrinsic_set_range(load,
296 state->type_size(var->type, var->data.bindless));
297
298 if (load->intrinsic == nir_intrinsic_load_input ||
299 load->intrinsic == nir_intrinsic_load_input_vertex ||
300 load->intrinsic == nir_intrinsic_load_uniform)
301 nir_intrinsic_set_type(load, type);
302
303 if (vertex_index) {
304 load->src[0] = nir_src_for_ssa(vertex_index);
305 load->src[1] = nir_src_for_ssa(offset);
306 } else if (barycentric) {
307 load->src[0] = nir_src_for_ssa(barycentric);
308 load->src[1] = nir_src_for_ssa(offset);
309 } else {
310 load->src[0] = nir_src_for_ssa(offset);
311 }
312
313 nir_ssa_dest_init(&load->instr, &load->dest,
314 num_components, bit_size, NULL);
315 nir_builder_instr_insert(b, &load->instr);
316
317 return &load->dest.ssa;
318 }
319
320 static nir_ssa_def *
321 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
322 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
323 unsigned component, const struct glsl_type *type)
324 {
325 assert(intrin->dest.is_ssa);
326 if (intrin->dest.ssa.bit_size == 64 &&
327 (state->options & nir_lower_io_lower_64bit_to_32)) {
328 nir_builder *b = &state->builder;
329
330 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
331
332 nir_ssa_def *comp64[4];
333 assert(component == 0 || component == 2);
334 unsigned dest_comp = 0;
335 while (dest_comp < intrin->dest.ssa.num_components) {
336 const unsigned num_comps =
337 MIN2(intrin->dest.ssa.num_components - dest_comp,
338 (4 - component) / 2);
339
340 nir_ssa_def *data32 =
341 emit_load(state, vertex_index, var, offset, component,
342 num_comps * 2, 32, nir_type_uint32);
343 for (unsigned i = 0; i < num_comps; i++) {
344 comp64[dest_comp + i] =
345 nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
346 }
347
348 /* Only the first store has a component offset */
349 component = 0;
350 dest_comp += num_comps;
351 offset = nir_iadd_imm(b, offset, slot_size);
352 }
353
354 return nir_vec(b, comp64, intrin->dest.ssa.num_components);
355 } else if (intrin->dest.ssa.bit_size == 1) {
356 /* Booleans are 32-bit */
357 assert(glsl_type_is_boolean(type));
358 return nir_b2b1(&state->builder,
359 emit_load(state, vertex_index, var, offset, component,
360 intrin->dest.ssa.num_components, 32,
361 nir_type_bool32));
362 } else {
363 return emit_load(state, vertex_index, var, offset, component,
364 intrin->dest.ssa.num_components,
365 intrin->dest.ssa.bit_size,
366 nir_get_nir_type_for_glsl_type(type));
367 }
368 }
369
370 static void
371 emit_store(struct lower_io_state *state, nir_ssa_def *data,
372 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
373 unsigned component, unsigned num_components,
374 nir_component_mask_t write_mask, nir_alu_type type)
375 {
376 nir_builder *b = &state->builder;
377 nir_variable_mode mode = var->data.mode;
378
379 nir_intrinsic_op op;
380 if (mode == nir_var_mem_shared) {
381 op = nir_intrinsic_store_shared;
382 } else {
383 assert(mode == nir_var_shader_out);
384 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
385 nir_intrinsic_store_output;
386 }
387
388 nir_intrinsic_instr *store =
389 nir_intrinsic_instr_create(state->builder.shader, op);
390 store->num_components = num_components;
391
392 store->src[0] = nir_src_for_ssa(data);
393
394 nir_intrinsic_set_base(store, var->data.driver_location);
395
396 if (mode == nir_var_shader_out)
397 nir_intrinsic_set_component(store, component);
398
399 if (store->intrinsic == nir_intrinsic_store_output)
400 nir_intrinsic_set_type(store, type);
401
402 nir_intrinsic_set_write_mask(store, write_mask);
403
404 if (vertex_index)
405 store->src[1] = nir_src_for_ssa(vertex_index);
406
407 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
408
409 nir_builder_instr_insert(b, &store->instr);
410 }
411
412 static void
413 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
414 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
415 unsigned component, const struct glsl_type *type)
416 {
417 assert(intrin->src[1].is_ssa);
418 if (intrin->src[1].ssa->bit_size == 64 &&
419 (state->options & nir_lower_io_lower_64bit_to_32)) {
420 nir_builder *b = &state->builder;
421
422 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
423
424 assert(component == 0 || component == 2);
425 unsigned src_comp = 0;
426 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
427 while (src_comp < intrin->num_components) {
428 const unsigned num_comps =
429 MIN2(intrin->num_components - src_comp,
430 (4 - component) / 2);
431
432 if (write_mask & BITFIELD_MASK(num_comps)) {
433 nir_ssa_def *data =
434 nir_channels(b, intrin->src[1].ssa,
435 BITFIELD_RANGE(src_comp, num_comps));
436 nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
437
438 nir_component_mask_t write_mask32 = 0;
439 for (unsigned i = 0; i < num_comps; i++) {
440 if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
441 write_mask32 |= 3 << (i * 2);
442 }
443
444 emit_store(state, data32, vertex_index, var, offset,
445 component, data32->num_components, write_mask32,
446 nir_type_uint32);
447 }
448
449 /* Only the first store has a component offset */
450 component = 0;
451 src_comp += num_comps;
452 write_mask >>= num_comps;
453 offset = nir_iadd_imm(b, offset, slot_size);
454 }
455 } else if (intrin->dest.ssa.bit_size == 1) {
456 /* Booleans are 32-bit */
457 assert(glsl_type_is_boolean(type));
458 nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
459 emit_store(state, b32_val, vertex_index, var, offset,
460 component, intrin->num_components,
461 nir_intrinsic_write_mask(intrin),
462 nir_type_bool32);
463 } else {
464 emit_store(state, intrin->src[1].ssa, vertex_index, var, offset,
465 component, intrin->num_components,
466 nir_intrinsic_write_mask(intrin),
467 nir_get_nir_type_for_glsl_type(type));
468 }
469 }
470
471 static nir_ssa_def *
472 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
473 nir_variable *var, nir_ssa_def *offset)
474 {
475 nir_builder *b = &state->builder;
476 assert(var->data.mode == nir_var_mem_shared);
477
478 nir_intrinsic_op op = shared_atomic_for_deref(intrin->intrinsic);
479
480 nir_intrinsic_instr *atomic =
481 nir_intrinsic_instr_create(state->builder.shader, op);
482
483 nir_intrinsic_set_base(atomic, var->data.driver_location);
484
485 atomic->src[0] = nir_src_for_ssa(offset);
486 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs ==
487 nir_intrinsic_infos[op].num_srcs);
488 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) {
489 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic);
490 }
491
492 if (nir_intrinsic_infos[op].has_dest) {
493 assert(intrin->dest.is_ssa);
494 assert(nir_intrinsic_infos[intrin->intrinsic].has_dest);
495 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
496 intrin->dest.ssa.num_components,
497 intrin->dest.ssa.bit_size, NULL);
498 }
499
500 nir_builder_instr_insert(b, &atomic->instr);
501
502 return nir_intrinsic_infos[op].has_dest ? &atomic->dest.ssa : NULL;
503 }
504
505 static nir_ssa_def *
506 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
507 nir_variable *var, nir_ssa_def *offset, unsigned component,
508 const struct glsl_type *type)
509 {
510 nir_builder *b = &state->builder;
511 assert(var->data.mode == nir_var_shader_in);
512
513 /* Ignore interpolateAt() for flat variables - flat is flat. Lower
514 * interpolateAtVertex() for explicit variables.
515 */
516 if (var->data.interpolation == INTERP_MODE_FLAT ||
517 var->data.interpolation == INTERP_MODE_EXPLICIT) {
518 nir_ssa_def *vertex_index = NULL;
519
520 if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
521 assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
522 vertex_index = intrin->src[1].ssa;
523 }
524
525 return lower_load(intrin, state, vertex_index, var, offset, component, type);
526 }
527
528 /* None of the supported APIs allow interpolation on 64-bit things */
529 assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
530
531 nir_intrinsic_op bary_op;
532 switch (intrin->intrinsic) {
533 case nir_intrinsic_interp_deref_at_centroid:
534 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
535 nir_intrinsic_load_barycentric_sample :
536 nir_intrinsic_load_barycentric_centroid;
537 break;
538 case nir_intrinsic_interp_deref_at_sample:
539 bary_op = nir_intrinsic_load_barycentric_at_sample;
540 break;
541 case nir_intrinsic_interp_deref_at_offset:
542 bary_op = nir_intrinsic_load_barycentric_at_offset;
543 break;
544 default:
545 unreachable("Bogus interpolateAt() intrinsic.");
546 }
547
548 nir_intrinsic_instr *bary_setup =
549 nir_intrinsic_instr_create(state->builder.shader, bary_op);
550
551 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
552 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
553
554 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
555 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
556 intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
557 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
558
559 nir_builder_instr_insert(b, &bary_setup->instr);
560
561 nir_intrinsic_instr *load =
562 nir_intrinsic_instr_create(state->builder.shader,
563 nir_intrinsic_load_interpolated_input);
564 load->num_components = intrin->num_components;
565
566 nir_intrinsic_set_base(load, var->data.driver_location);
567 nir_intrinsic_set_component(load, component);
568
569 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
570 load->src[1] = nir_src_for_ssa(offset);
571
572 assert(intrin->dest.is_ssa);
573 nir_ssa_dest_init(&load->instr, &load->dest,
574 intrin->dest.ssa.num_components,
575 intrin->dest.ssa.bit_size, NULL);
576 nir_builder_instr_insert(b, &load->instr);
577
578 return &load->dest.ssa;
579 }
580
581 static bool
582 nir_lower_io_block(nir_block *block,
583 struct lower_io_state *state)
584 {
585 nir_builder *b = &state->builder;
586 const nir_shader_compiler_options *options = b->shader->options;
587 bool progress = false;
588
589 nir_foreach_instr_safe(instr, block) {
590 if (instr->type != nir_instr_type_intrinsic)
591 continue;
592
593 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
594
595 switch (intrin->intrinsic) {
596 case nir_intrinsic_load_deref:
597 case nir_intrinsic_store_deref:
598 case nir_intrinsic_deref_atomic_add:
599 case nir_intrinsic_deref_atomic_imin:
600 case nir_intrinsic_deref_atomic_umin:
601 case nir_intrinsic_deref_atomic_imax:
602 case nir_intrinsic_deref_atomic_umax:
603 case nir_intrinsic_deref_atomic_and:
604 case nir_intrinsic_deref_atomic_or:
605 case nir_intrinsic_deref_atomic_xor:
606 case nir_intrinsic_deref_atomic_exchange:
607 case nir_intrinsic_deref_atomic_comp_swap:
608 case nir_intrinsic_deref_atomic_fadd:
609 case nir_intrinsic_deref_atomic_fmin:
610 case nir_intrinsic_deref_atomic_fmax:
611 case nir_intrinsic_deref_atomic_fcomp_swap:
612 /* We can lower the io for this nir instrinsic */
613 break;
614 case nir_intrinsic_interp_deref_at_centroid:
615 case nir_intrinsic_interp_deref_at_sample:
616 case nir_intrinsic_interp_deref_at_offset:
617 case nir_intrinsic_interp_deref_at_vertex:
618 /* We can optionally lower these to load_interpolated_input */
619 if (options->use_interpolated_input_intrinsics)
620 break;
621 default:
622 /* We can't lower the io for this nir instrinsic, so skip it */
623 continue;
624 }
625
626 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
627
628 nir_variable_mode mode = deref->mode;
629
630 if ((state->modes & mode) == 0)
631 continue;
632
633 if (mode != nir_var_shader_in &&
634 mode != nir_var_shader_out &&
635 mode != nir_var_mem_shared &&
636 mode != nir_var_uniform)
637 continue;
638
639 nir_variable *var = nir_deref_instr_get_variable(deref);
640
641 b->cursor = nir_before_instr(instr);
642
643 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
644
645 nir_ssa_def *offset;
646 nir_ssa_def *vertex_index = NULL;
647 unsigned component_offset = var->data.location_frac;
648 bool bindless_type_size = mode == nir_var_shader_in ||
649 mode == nir_var_shader_out ||
650 var->data.bindless;
651
652 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
653 state->type_size, &component_offset,
654 bindless_type_size);
655
656 nir_ssa_def *replacement = NULL;
657
658 switch (intrin->intrinsic) {
659 case nir_intrinsic_load_deref:
660 replacement = lower_load(intrin, state, vertex_index, var, offset,
661 component_offset, deref->type);
662 break;
663
664 case nir_intrinsic_store_deref:
665 lower_store(intrin, state, vertex_index, var, offset,
666 component_offset, deref->type);
667 break;
668
669 case nir_intrinsic_deref_atomic_add:
670 case nir_intrinsic_deref_atomic_imin:
671 case nir_intrinsic_deref_atomic_umin:
672 case nir_intrinsic_deref_atomic_imax:
673 case nir_intrinsic_deref_atomic_umax:
674 case nir_intrinsic_deref_atomic_and:
675 case nir_intrinsic_deref_atomic_or:
676 case nir_intrinsic_deref_atomic_xor:
677 case nir_intrinsic_deref_atomic_exchange:
678 case nir_intrinsic_deref_atomic_comp_swap:
679 case nir_intrinsic_deref_atomic_fadd:
680 case nir_intrinsic_deref_atomic_fmin:
681 case nir_intrinsic_deref_atomic_fmax:
682 case nir_intrinsic_deref_atomic_fcomp_swap:
683 assert(vertex_index == NULL);
684 replacement = lower_atomic(intrin, state, var, offset);
685 break;
686
687 case nir_intrinsic_interp_deref_at_centroid:
688 case nir_intrinsic_interp_deref_at_sample:
689 case nir_intrinsic_interp_deref_at_offset:
690 case nir_intrinsic_interp_deref_at_vertex:
691 assert(vertex_index == NULL);
692 replacement = lower_interpolate_at(intrin, state, var, offset,
693 component_offset, deref->type);
694 break;
695
696 default:
697 continue;
698 }
699
700 if (replacement) {
701 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
702 nir_src_for_ssa(replacement));
703 }
704 nir_instr_remove(&intrin->instr);
705 progress = true;
706 }
707
708 return progress;
709 }
710
711 static bool
712 nir_lower_io_impl(nir_function_impl *impl,
713 nir_variable_mode modes,
714 int (*type_size)(const struct glsl_type *, bool),
715 nir_lower_io_options options)
716 {
717 struct lower_io_state state;
718 bool progress = false;
719
720 nir_builder_init(&state.builder, impl);
721 state.dead_ctx = ralloc_context(NULL);
722 state.modes = modes;
723 state.type_size = type_size;
724 state.options = options;
725
726 nir_foreach_block(block, impl) {
727 progress |= nir_lower_io_block(block, &state);
728 }
729
730 ralloc_free(state.dead_ctx);
731
732 nir_metadata_preserve(impl, nir_metadata_block_index |
733 nir_metadata_dominance);
734 return progress;
735 }
736
737 bool
738 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
739 int (*type_size)(const struct glsl_type *, bool),
740 nir_lower_io_options options)
741 {
742 bool progress = false;
743
744 nir_foreach_function(function, shader) {
745 if (function->impl) {
746 progress |= nir_lower_io_impl(function->impl, modes,
747 type_size, options);
748 }
749 }
750
751 return progress;
752 }
753
754 static unsigned
755 type_scalar_size_bytes(const struct glsl_type *type)
756 {
757 assert(glsl_type_is_vector_or_scalar(type) ||
758 glsl_type_is_matrix(type));
759 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
760 }
761
762 static nir_ssa_def *
763 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
764 nir_address_format addr_format, nir_ssa_def *offset)
765 {
766 assert(offset->num_components == 1);
767 assert(addr->bit_size == offset->bit_size);
768
769 switch (addr_format) {
770 case nir_address_format_32bit_global:
771 case nir_address_format_64bit_global:
772 case nir_address_format_32bit_offset:
773 assert(addr->num_components == 1);
774 return nir_iadd(b, addr, offset);
775
776 case nir_address_format_64bit_bounded_global:
777 assert(addr->num_components == 4);
778 return nir_vec4(b, nir_channel(b, addr, 0),
779 nir_channel(b, addr, 1),
780 nir_channel(b, addr, 2),
781 nir_iadd(b, nir_channel(b, addr, 3), offset));
782
783 case nir_address_format_32bit_index_offset:
784 assert(addr->num_components == 2);
785 return nir_vec2(b, nir_channel(b, addr, 0),
786 nir_iadd(b, nir_channel(b, addr, 1), offset));
787 case nir_address_format_vec2_index_32bit_offset:
788 assert(addr->num_components == 3);
789 return nir_vec3(b, nir_channel(b, addr, 0), nir_channel(b, addr, 1),
790 nir_iadd(b, nir_channel(b, addr, 2), offset));
791 case nir_address_format_logical:
792 unreachable("Unsupported address format");
793 }
794 unreachable("Invalid address format");
795 }
796
797 static nir_ssa_def *
798 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
799 nir_address_format addr_format, int64_t offset)
800 {
801 return build_addr_iadd(b, addr, addr_format,
802 nir_imm_intN_t(b, offset, addr->bit_size));
803 }
804
805 static nir_ssa_def *
806 addr_to_index(nir_builder *b, nir_ssa_def *addr,
807 nir_address_format addr_format)
808 {
809 if (addr_format == nir_address_format_32bit_index_offset) {
810 assert(addr->num_components == 2);
811 return nir_channel(b, addr, 0);
812 } else if (addr_format == nir_address_format_vec2_index_32bit_offset) {
813 assert(addr->num_components == 3);
814 return nir_channels(b, addr, 0x3);
815 } else {
816 unreachable("bad address format for index");
817 }
818 }
819
820 static nir_ssa_def *
821 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
822 nir_address_format addr_format)
823 {
824 if (addr_format == nir_address_format_32bit_index_offset) {
825 assert(addr->num_components == 2);
826 return nir_channel(b, addr, 1);
827 } else if (addr_format == nir_address_format_vec2_index_32bit_offset) {
828 assert(addr->num_components == 3);
829 return nir_channel(b, addr, 2);
830 } else {
831 unreachable("bad address format for offset");
832 }
833 }
834
835 /** Returns true if the given address format resolves to a global address */
836 static bool
837 addr_format_is_global(nir_address_format addr_format)
838 {
839 return addr_format == nir_address_format_32bit_global ||
840 addr_format == nir_address_format_64bit_global ||
841 addr_format == nir_address_format_64bit_bounded_global;
842 }
843
844 static nir_ssa_def *
845 addr_to_global(nir_builder *b, nir_ssa_def *addr,
846 nir_address_format addr_format)
847 {
848 switch (addr_format) {
849 case nir_address_format_32bit_global:
850 case nir_address_format_64bit_global:
851 assert(addr->num_components == 1);
852 return addr;
853
854 case nir_address_format_64bit_bounded_global:
855 assert(addr->num_components == 4);
856 return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
857 nir_u2u64(b, nir_channel(b, addr, 3)));
858
859 case nir_address_format_32bit_index_offset:
860 case nir_address_format_vec2_index_32bit_offset:
861 case nir_address_format_32bit_offset:
862 case nir_address_format_logical:
863 unreachable("Cannot get a 64-bit address with this address format");
864 }
865
866 unreachable("Invalid address format");
867 }
868
869 static bool
870 addr_format_needs_bounds_check(nir_address_format addr_format)
871 {
872 return addr_format == nir_address_format_64bit_bounded_global;
873 }
874
875 static nir_ssa_def *
876 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
877 nir_address_format addr_format, unsigned size)
878 {
879 assert(addr_format == nir_address_format_64bit_bounded_global);
880 assert(addr->num_components == 4);
881 return nir_ige(b, nir_channel(b, addr, 2),
882 nir_iadd_imm(b, nir_channel(b, addr, 3), size));
883 }
884
885 static nir_ssa_def *
886 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
887 nir_ssa_def *addr, nir_address_format addr_format,
888 unsigned num_components)
889 {
890 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
891
892 nir_intrinsic_op op;
893 switch (mode) {
894 case nir_var_mem_ubo:
895 op = nir_intrinsic_load_ubo;
896 break;
897 case nir_var_mem_ssbo:
898 if (addr_format_is_global(addr_format))
899 op = nir_intrinsic_load_global;
900 else
901 op = nir_intrinsic_load_ssbo;
902 break;
903 case nir_var_mem_global:
904 assert(addr_format_is_global(addr_format));
905 op = nir_intrinsic_load_global;
906 break;
907 case nir_var_shader_in:
908 assert(addr_format_is_global(addr_format));
909 op = nir_intrinsic_load_kernel_input;
910 break;
911 case nir_var_mem_shared:
912 assert(addr_format == nir_address_format_32bit_offset);
913 op = nir_intrinsic_load_shared;
914 break;
915 default:
916 unreachable("Unsupported explicit IO variable mode");
917 }
918
919 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
920
921 if (addr_format_is_global(addr_format)) {
922 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
923 } else if (addr_format == nir_address_format_32bit_offset) {
924 assert(addr->num_components == 1);
925 load->src[0] = nir_src_for_ssa(addr);
926 } else {
927 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
928 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
929 }
930
931 if (mode != nir_var_shader_in && mode != nir_var_mem_shared)
932 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
933
934 unsigned bit_size = intrin->dest.ssa.bit_size;
935 if (bit_size == 1) {
936 /* TODO: Make the native bool bit_size an option. */
937 bit_size = 32;
938 }
939
940 /* TODO: We should try and provide a better alignment. For OpenCL, we need
941 * to plumb the alignment through from SPIR-V when we have one.
942 */
943 nir_intrinsic_set_align(load, bit_size / 8, 0);
944
945 assert(intrin->dest.is_ssa);
946 load->num_components = num_components;
947 nir_ssa_dest_init(&load->instr, &load->dest, num_components,
948 bit_size, intrin->dest.ssa.name);
949
950 assert(bit_size % 8 == 0);
951
952 nir_ssa_def *result;
953 if (addr_format_needs_bounds_check(addr_format)) {
954 /* The Vulkan spec for robustBufferAccess gives us quite a few options
955 * as to what we can do with an OOB read. Unfortunately, returning
956 * undefined values isn't one of them so we return an actual zero.
957 */
958 nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
959
960 const unsigned load_size = (bit_size / 8) * load->num_components;
961 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
962
963 nir_builder_instr_insert(b, &load->instr);
964
965 nir_pop_if(b, NULL);
966
967 result = nir_if_phi(b, &load->dest.ssa, zero);
968 } else {
969 nir_builder_instr_insert(b, &load->instr);
970 result = &load->dest.ssa;
971 }
972
973 if (intrin->dest.ssa.bit_size == 1) {
974 /* For shared, we can go ahead and use NIR's and/or the back-end's
975 * standard encoding for booleans rather than forcing a 0/1 boolean.
976 * This should save an instruction or two.
977 */
978 if (mode == nir_var_mem_shared)
979 result = nir_b2b1(b, result);
980 else
981 result = nir_i2b(b, result);
982 }
983
984 return result;
985 }
986
987 static void
988 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
989 nir_ssa_def *addr, nir_address_format addr_format,
990 nir_ssa_def *value, nir_component_mask_t write_mask)
991 {
992 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
993
994 nir_intrinsic_op op;
995 switch (mode) {
996 case nir_var_mem_ssbo:
997 if (addr_format_is_global(addr_format))
998 op = nir_intrinsic_store_global;
999 else
1000 op = nir_intrinsic_store_ssbo;
1001 break;
1002 case nir_var_mem_global:
1003 assert(addr_format_is_global(addr_format));
1004 op = nir_intrinsic_store_global;
1005 break;
1006 case nir_var_mem_shared:
1007 assert(addr_format == nir_address_format_32bit_offset);
1008 op = nir_intrinsic_store_shared;
1009 break;
1010 default:
1011 unreachable("Unsupported explicit IO variable mode");
1012 }
1013
1014 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1015
1016 if (value->bit_size == 1) {
1017 /* For shared, we can go ahead and use NIR's and/or the back-end's
1018 * standard encoding for booleans rather than forcing a 0/1 boolean.
1019 * This should save an instruction or two.
1020 *
1021 * TODO: Make the native bool bit_size an option.
1022 */
1023 if (mode == nir_var_mem_shared)
1024 value = nir_b2b32(b, value);
1025 else
1026 value = nir_b2i(b, value, 32);
1027 }
1028
1029 store->src[0] = nir_src_for_ssa(value);
1030 if (addr_format_is_global(addr_format)) {
1031 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1032 } else if (addr_format == nir_address_format_32bit_offset) {
1033 assert(addr->num_components == 1);
1034 store->src[1] = nir_src_for_ssa(addr);
1035 } else {
1036 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1037 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1038 }
1039
1040 nir_intrinsic_set_write_mask(store, write_mask);
1041
1042 if (mode != nir_var_mem_shared)
1043 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1044
1045 /* TODO: We should try and provide a better alignment. For OpenCL, we need
1046 * to plumb the alignment through from SPIR-V when we have one.
1047 */
1048 nir_intrinsic_set_align(store, value->bit_size / 8, 0);
1049
1050 assert(value->num_components == 1 ||
1051 value->num_components == intrin->num_components);
1052 store->num_components = value->num_components;
1053
1054 assert(value->bit_size % 8 == 0);
1055
1056 if (addr_format_needs_bounds_check(addr_format)) {
1057 const unsigned store_size = (value->bit_size / 8) * store->num_components;
1058 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1059
1060 nir_builder_instr_insert(b, &store->instr);
1061
1062 nir_pop_if(b, NULL);
1063 } else {
1064 nir_builder_instr_insert(b, &store->instr);
1065 }
1066 }
1067
1068 static nir_ssa_def *
1069 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1070 nir_ssa_def *addr, nir_address_format addr_format)
1071 {
1072 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
1073 const unsigned num_data_srcs =
1074 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1075
1076 nir_intrinsic_op op;
1077 switch (mode) {
1078 case nir_var_mem_ssbo:
1079 if (addr_format_is_global(addr_format))
1080 op = global_atomic_for_deref(intrin->intrinsic);
1081 else
1082 op = ssbo_atomic_for_deref(intrin->intrinsic);
1083 break;
1084 case nir_var_mem_global:
1085 assert(addr_format_is_global(addr_format));
1086 op = global_atomic_for_deref(intrin->intrinsic);
1087 break;
1088 case nir_var_mem_shared:
1089 assert(addr_format == nir_address_format_32bit_offset);
1090 op = shared_atomic_for_deref(intrin->intrinsic);
1091 break;
1092 default:
1093 unreachable("Unsupported explicit IO variable mode");
1094 }
1095
1096 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1097
1098 unsigned src = 0;
1099 if (addr_format_is_global(addr_format)) {
1100 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1101 } else if (addr_format == nir_address_format_32bit_offset) {
1102 assert(addr->num_components == 1);
1103 atomic->src[src++] = nir_src_for_ssa(addr);
1104 } else {
1105 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1106 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1107 }
1108 for (unsigned i = 0; i < num_data_srcs; i++) {
1109 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1110 }
1111
1112 /* Global atomics don't have access flags because they assume that the
1113 * address may be non-uniform.
1114 */
1115 if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared)
1116 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1117
1118 assert(intrin->dest.ssa.num_components == 1);
1119 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
1120 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
1121
1122 assert(atomic->dest.ssa.bit_size % 8 == 0);
1123
1124 if (addr_format_needs_bounds_check(addr_format)) {
1125 const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
1126 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1127
1128 nir_builder_instr_insert(b, &atomic->instr);
1129
1130 nir_pop_if(b, NULL);
1131 return nir_if_phi(b, &atomic->dest.ssa,
1132 nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
1133 } else {
1134 nir_builder_instr_insert(b, &atomic->instr);
1135 return &atomic->dest.ssa;
1136 }
1137 }
1138
1139 nir_ssa_def *
1140 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1141 nir_ssa_def *base_addr,
1142 nir_address_format addr_format)
1143 {
1144 assert(deref->dest.is_ssa);
1145 switch (deref->deref_type) {
1146 case nir_deref_type_var:
1147 assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared));
1148 return nir_imm_intN_t(b, deref->var->data.driver_location,
1149 deref->dest.ssa.bit_size);
1150
1151 case nir_deref_type_array: {
1152 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1153
1154 unsigned stride = glsl_get_explicit_stride(parent->type);
1155 if ((glsl_type_is_matrix(parent->type) &&
1156 glsl_matrix_type_is_row_major(parent->type)) ||
1157 (glsl_type_is_vector(parent->type) && stride == 0))
1158 stride = type_scalar_size_bytes(parent->type);
1159
1160 assert(stride > 0);
1161
1162 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1163 index = nir_i2i(b, index, base_addr->bit_size);
1164 return build_addr_iadd(b, base_addr, addr_format,
1165 nir_amul_imm(b, index, stride));
1166 }
1167
1168 case nir_deref_type_ptr_as_array: {
1169 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1170 index = nir_i2i(b, index, base_addr->bit_size);
1171 unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
1172 return build_addr_iadd(b, base_addr, addr_format,
1173 nir_amul_imm(b, index, stride));
1174 }
1175
1176 case nir_deref_type_array_wildcard:
1177 unreachable("Wildcards should be lowered by now");
1178 break;
1179
1180 case nir_deref_type_struct: {
1181 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1182 int offset = glsl_get_struct_field_offset(parent->type,
1183 deref->strct.index);
1184 assert(offset >= 0);
1185 return build_addr_iadd_imm(b, base_addr, addr_format, offset);
1186 }
1187
1188 case nir_deref_type_cast:
1189 /* Nothing to do here */
1190 return base_addr;
1191 }
1192
1193 unreachable("Invalid NIR deref type");
1194 }
1195
1196 void
1197 nir_lower_explicit_io_instr(nir_builder *b,
1198 nir_intrinsic_instr *intrin,
1199 nir_ssa_def *addr,
1200 nir_address_format addr_format)
1201 {
1202 b->cursor = nir_after_instr(&intrin->instr);
1203
1204 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1205 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1206 unsigned scalar_size = type_scalar_size_bytes(deref->type);
1207 assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
1208 assert(vec_stride == 0 || vec_stride >= scalar_size);
1209
1210 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1211 nir_ssa_def *value;
1212 if (vec_stride > scalar_size) {
1213 nir_ssa_def *comps[4] = { NULL, };
1214 for (unsigned i = 0; i < intrin->num_components; i++) {
1215 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1216 vec_stride * i);
1217 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1218 addr_format, 1);
1219 }
1220 value = nir_vec(b, comps, intrin->num_components);
1221 } else {
1222 value = build_explicit_io_load(b, intrin, addr, addr_format,
1223 intrin->num_components);
1224 }
1225 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1226 } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
1227 assert(intrin->src[1].is_ssa);
1228 nir_ssa_def *value = intrin->src[1].ssa;
1229 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1230 if (vec_stride > scalar_size) {
1231 for (unsigned i = 0; i < intrin->num_components; i++) {
1232 if (!(write_mask & (1 << i)))
1233 continue;
1234
1235 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1236 vec_stride * i);
1237 build_explicit_io_store(b, intrin, comp_addr, addr_format,
1238 nir_channel(b, value, i), 1);
1239 }
1240 } else {
1241 build_explicit_io_store(b, intrin, addr, addr_format,
1242 value, write_mask);
1243 }
1244 } else {
1245 nir_ssa_def *value =
1246 build_explicit_io_atomic(b, intrin, addr, addr_format);
1247 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1248 }
1249
1250 nir_instr_remove(&intrin->instr);
1251 }
1252
1253 static void
1254 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
1255 nir_address_format addr_format)
1256 {
1257 /* Just delete the deref if it's not used. We can't use
1258 * nir_deref_instr_remove_if_unused here because it may remove more than
1259 * one deref which could break our list walking since we walk the list
1260 * backwards.
1261 */
1262 assert(list_is_empty(&deref->dest.ssa.if_uses));
1263 if (list_is_empty(&deref->dest.ssa.uses)) {
1264 nir_instr_remove(&deref->instr);
1265 return;
1266 }
1267
1268 b->cursor = nir_after_instr(&deref->instr);
1269
1270 nir_ssa_def *base_addr = NULL;
1271 if (deref->deref_type != nir_deref_type_var) {
1272 assert(deref->parent.is_ssa);
1273 base_addr = deref->parent.ssa;
1274 }
1275
1276 nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
1277 addr_format);
1278
1279 nir_instr_remove(&deref->instr);
1280 nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
1281 }
1282
1283 static void
1284 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
1285 nir_address_format addr_format)
1286 {
1287 assert(intrin->src[0].is_ssa);
1288 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
1289 }
1290
1291 static void
1292 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
1293 nir_address_format addr_format)
1294 {
1295 b->cursor = nir_after_instr(&intrin->instr);
1296
1297 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1298
1299 assert(glsl_type_is_array(deref->type));
1300 assert(glsl_get_length(deref->type) == 0);
1301 unsigned stride = glsl_get_explicit_stride(deref->type);
1302 assert(stride > 0);
1303
1304 assert(addr_format == nir_address_format_32bit_index_offset ||
1305 addr_format == nir_address_format_vec2_index_32bit_offset);
1306 nir_ssa_def *addr = &deref->dest.ssa;
1307 nir_ssa_def *index = addr_to_index(b, addr, addr_format);
1308 nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
1309
1310 nir_intrinsic_instr *bsize =
1311 nir_intrinsic_instr_create(b->shader, nir_intrinsic_get_buffer_size);
1312 bsize->src[0] = nir_src_for_ssa(index);
1313 nir_ssa_dest_init(&bsize->instr, &bsize->dest, 1, 32, NULL);
1314 nir_builder_instr_insert(b, &bsize->instr);
1315
1316 nir_ssa_def *arr_size =
1317 nir_idiv(b, nir_isub(b, &bsize->dest.ssa, offset),
1318 nir_imm_int(b, stride));
1319
1320 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(arr_size));
1321 nir_instr_remove(&intrin->instr);
1322 }
1323
1324 static bool
1325 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
1326 nir_address_format addr_format)
1327 {
1328 bool progress = false;
1329
1330 nir_builder b;
1331 nir_builder_init(&b, impl);
1332
1333 /* Walk in reverse order so that we can see the full deref chain when we
1334 * lower the access operations. We lower them assuming that the derefs
1335 * will be turned into address calculations later.
1336 */
1337 nir_foreach_block_reverse(block, impl) {
1338 nir_foreach_instr_reverse_safe(instr, block) {
1339 switch (instr->type) {
1340 case nir_instr_type_deref: {
1341 nir_deref_instr *deref = nir_instr_as_deref(instr);
1342 if (deref->mode & modes) {
1343 lower_explicit_io_deref(&b, deref, addr_format);
1344 progress = true;
1345 }
1346 break;
1347 }
1348
1349 case nir_instr_type_intrinsic: {
1350 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1351 switch (intrin->intrinsic) {
1352 case nir_intrinsic_load_deref:
1353 case nir_intrinsic_store_deref:
1354 case nir_intrinsic_deref_atomic_add:
1355 case nir_intrinsic_deref_atomic_imin:
1356 case nir_intrinsic_deref_atomic_umin:
1357 case nir_intrinsic_deref_atomic_imax:
1358 case nir_intrinsic_deref_atomic_umax:
1359 case nir_intrinsic_deref_atomic_and:
1360 case nir_intrinsic_deref_atomic_or:
1361 case nir_intrinsic_deref_atomic_xor:
1362 case nir_intrinsic_deref_atomic_exchange:
1363 case nir_intrinsic_deref_atomic_comp_swap:
1364 case nir_intrinsic_deref_atomic_fadd:
1365 case nir_intrinsic_deref_atomic_fmin:
1366 case nir_intrinsic_deref_atomic_fmax:
1367 case nir_intrinsic_deref_atomic_fcomp_swap: {
1368 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1369 if (deref->mode & modes) {
1370 lower_explicit_io_access(&b, intrin, addr_format);
1371 progress = true;
1372 }
1373 break;
1374 }
1375
1376 case nir_intrinsic_deref_buffer_array_length: {
1377 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1378 if (deref->mode & modes) {
1379 lower_explicit_io_array_length(&b, intrin, addr_format);
1380 progress = true;
1381 }
1382 break;
1383 }
1384
1385 default:
1386 break;
1387 }
1388 break;
1389 }
1390
1391 default:
1392 /* Nothing to do */
1393 break;
1394 }
1395 }
1396 }
1397
1398 if (progress) {
1399 nir_metadata_preserve(impl, nir_metadata_block_index |
1400 nir_metadata_dominance);
1401 }
1402
1403 return progress;
1404 }
1405
1406 bool
1407 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
1408 nir_address_format addr_format)
1409 {
1410 bool progress = false;
1411
1412 nir_foreach_function(function, shader) {
1413 if (function->impl &&
1414 nir_lower_explicit_io_impl(function->impl, modes, addr_format))
1415 progress = true;
1416 }
1417
1418 return progress;
1419 }
1420
1421 static bool
1422 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
1423 nir_variable_mode modes,
1424 glsl_type_size_align_func type_info)
1425 {
1426 bool progress = false;
1427
1428 nir_foreach_block(block, impl) {
1429 nir_foreach_instr(instr, block) {
1430 if (instr->type != nir_instr_type_deref)
1431 continue;
1432
1433 nir_deref_instr *deref = nir_instr_as_deref(instr);
1434 if (!(deref->mode & modes))
1435 continue;
1436
1437 unsigned size, alignment;
1438 const struct glsl_type *new_type =
1439 glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
1440 if (new_type != deref->type) {
1441 progress = true;
1442 deref->type = new_type;
1443 }
1444 if (deref->deref_type == nir_deref_type_cast) {
1445 /* See also glsl_type::get_explicit_type_for_size_align() */
1446 unsigned new_stride = align(size, alignment);
1447 if (new_stride != deref->cast.ptr_stride) {
1448 deref->cast.ptr_stride = new_stride;
1449 progress = true;
1450 }
1451 }
1452 }
1453 }
1454
1455 if (progress) {
1456 nir_metadata_preserve(impl, nir_metadata_block_index |
1457 nir_metadata_dominance |
1458 nir_metadata_live_ssa_defs |
1459 nir_metadata_loop_analysis);
1460 }
1461
1462 return progress;
1463 }
1464
1465 static bool
1466 lower_vars_to_explicit(nir_shader *shader,
1467 struct exec_list *vars, nir_variable_mode mode,
1468 glsl_type_size_align_func type_info)
1469 {
1470 bool progress = false;
1471 unsigned offset = 0;
1472 nir_foreach_variable(var, vars) {
1473 unsigned size, align;
1474 const struct glsl_type *explicit_type =
1475 glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
1476
1477 if (explicit_type != var->type) {
1478 progress = true;
1479 var->type = explicit_type;
1480 }
1481
1482 var->data.driver_location = ALIGN_POT(offset, align);
1483 offset = var->data.driver_location + size;
1484 }
1485
1486 if (mode == nir_var_mem_shared) {
1487 shader->info.cs.shared_size = offset;
1488 shader->num_shared = offset;
1489 }
1490
1491 return progress;
1492 }
1493
1494 bool
1495 nir_lower_vars_to_explicit_types(nir_shader *shader,
1496 nir_variable_mode modes,
1497 glsl_type_size_align_func type_info)
1498 {
1499 /* TODO: Situations which need to be handled to support more modes:
1500 * - row-major matrices
1501 * - compact shader inputs/outputs
1502 * - interface types
1503 */
1504 ASSERTED nir_variable_mode supported = nir_var_mem_shared |
1505 nir_var_shader_temp | nir_var_function_temp;
1506 assert(!(modes & ~supported) && "unsupported");
1507
1508 bool progress = false;
1509
1510 if (modes & nir_var_mem_shared)
1511 progress |= lower_vars_to_explicit(shader, &shader->shared, nir_var_mem_shared, type_info);
1512 if (modes & nir_var_shader_temp)
1513 progress |= lower_vars_to_explicit(shader, &shader->globals, nir_var_shader_temp, type_info);
1514
1515 nir_foreach_function(function, shader) {
1516 if (function->impl) {
1517 if (modes & nir_var_function_temp)
1518 progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
1519
1520 progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
1521 }
1522 }
1523
1524 return progress;
1525 }
1526
1527 /**
1528 * Return the offset source for a load/store intrinsic.
1529 */
1530 nir_src *
1531 nir_get_io_offset_src(nir_intrinsic_instr *instr)
1532 {
1533 switch (instr->intrinsic) {
1534 case nir_intrinsic_load_input:
1535 case nir_intrinsic_load_output:
1536 case nir_intrinsic_load_shared:
1537 case nir_intrinsic_load_uniform:
1538 case nir_intrinsic_load_global:
1539 case nir_intrinsic_load_scratch:
1540 case nir_intrinsic_load_fs_input_interp_deltas:
1541 return &instr->src[0];
1542 case nir_intrinsic_load_ubo:
1543 case nir_intrinsic_load_ssbo:
1544 case nir_intrinsic_load_per_vertex_input:
1545 case nir_intrinsic_load_per_vertex_output:
1546 case nir_intrinsic_load_interpolated_input:
1547 case nir_intrinsic_store_output:
1548 case nir_intrinsic_store_shared:
1549 case nir_intrinsic_store_global:
1550 case nir_intrinsic_store_scratch:
1551 case nir_intrinsic_ssbo_atomic_add:
1552 case nir_intrinsic_ssbo_atomic_imin:
1553 case nir_intrinsic_ssbo_atomic_umin:
1554 case nir_intrinsic_ssbo_atomic_imax:
1555 case nir_intrinsic_ssbo_atomic_umax:
1556 case nir_intrinsic_ssbo_atomic_and:
1557 case nir_intrinsic_ssbo_atomic_or:
1558 case nir_intrinsic_ssbo_atomic_xor:
1559 case nir_intrinsic_ssbo_atomic_exchange:
1560 case nir_intrinsic_ssbo_atomic_comp_swap:
1561 case nir_intrinsic_ssbo_atomic_fadd:
1562 case nir_intrinsic_ssbo_atomic_fmin:
1563 case nir_intrinsic_ssbo_atomic_fmax:
1564 case nir_intrinsic_ssbo_atomic_fcomp_swap:
1565 return &instr->src[1];
1566 case nir_intrinsic_store_ssbo:
1567 case nir_intrinsic_store_per_vertex_output:
1568 return &instr->src[2];
1569 default:
1570 return NULL;
1571 }
1572 }
1573
1574 /**
1575 * Return the vertex index source for a load/store per_vertex intrinsic.
1576 */
1577 nir_src *
1578 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
1579 {
1580 switch (instr->intrinsic) {
1581 case nir_intrinsic_load_per_vertex_input:
1582 case nir_intrinsic_load_per_vertex_output:
1583 return &instr->src[0];
1584 case nir_intrinsic_store_per_vertex_output:
1585 return &instr->src[1];
1586 default:
1587 return NULL;
1588 }
1589 }
1590
1591 /**
1592 * Return the numeric constant that identify a NULL pointer for each address
1593 * format.
1594 */
1595 const nir_const_value *
1596 nir_address_format_null_value(nir_address_format addr_format)
1597 {
1598 const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
1599 [nir_address_format_32bit_global] = {{0}},
1600 [nir_address_format_64bit_global] = {{0}},
1601 [nir_address_format_64bit_bounded_global] = {{0}},
1602 [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
1603 [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}},
1604 [nir_address_format_32bit_offset] = {{.u32 = ~0}},
1605 [nir_address_format_logical] = {{.u32 = ~0}},
1606 };
1607
1608 assert(addr_format < ARRAY_SIZE(null_values));
1609 return null_values[addr_format];
1610 }
1611
1612 nir_ssa_def *
1613 nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
1614 nir_address_format addr_format)
1615 {
1616 switch (addr_format) {
1617 case nir_address_format_32bit_global:
1618 case nir_address_format_64bit_global:
1619 case nir_address_format_64bit_bounded_global:
1620 case nir_address_format_32bit_index_offset:
1621 case nir_address_format_vec2_index_32bit_offset:
1622 case nir_address_format_32bit_offset:
1623 return nir_ball_iequal(b, addr0, addr1);
1624
1625 case nir_address_format_logical:
1626 unreachable("Unsupported address format");
1627 }
1628
1629 unreachable("Invalid address format");
1630 }
1631
1632 nir_ssa_def *
1633 nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
1634 nir_address_format addr_format)
1635 {
1636 switch (addr_format) {
1637 case nir_address_format_32bit_global:
1638 case nir_address_format_64bit_global:
1639 case nir_address_format_32bit_offset:
1640 assert(addr0->num_components == 1);
1641 assert(addr1->num_components == 1);
1642 return nir_isub(b, addr0, addr1);
1643
1644 case nir_address_format_64bit_bounded_global:
1645 return nir_isub(b, addr_to_global(b, addr0, addr_format),
1646 addr_to_global(b, addr1, addr_format));
1647
1648 case nir_address_format_32bit_index_offset:
1649 assert(addr0->num_components == 2);
1650 assert(addr1->num_components == 2);
1651 /* Assume the same buffer index. */
1652 return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
1653
1654 case nir_address_format_vec2_index_32bit_offset:
1655 assert(addr0->num_components == 3);
1656 assert(addr1->num_components == 3);
1657 /* Assume the same buffer index. */
1658 return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
1659
1660 case nir_address_format_logical:
1661 unreachable("Unsupported address format");
1662 }
1663
1664 unreachable("Invalid address format");
1665 }
1666
1667 static bool
1668 is_input(nir_intrinsic_instr *intrin)
1669 {
1670 return intrin->intrinsic == nir_intrinsic_load_input ||
1671 intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
1672 intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
1673 intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
1674 }
1675
1676 static bool
1677 is_output(nir_intrinsic_instr *intrin)
1678 {
1679 return intrin->intrinsic == nir_intrinsic_load_output ||
1680 intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
1681 intrin->intrinsic == nir_intrinsic_store_output ||
1682 intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
1683 }
1684
1685
1686 /**
1687 * This pass adds constant offsets to instr->const_index[0] for input/output
1688 * intrinsics, and resets the offset source to 0. Non-constant offsets remain
1689 * unchanged - since we don't know what part of a compound variable is
1690 * accessed, we allocate storage for the entire thing. For drivers that use
1691 * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
1692 * the offset source will be 0, so that they don't have to add it in manually.
1693 */
1694
1695 static bool
1696 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
1697 nir_variable_mode mode)
1698 {
1699 bool progress = false;
1700 nir_foreach_instr_safe(instr, block) {
1701 if (instr->type != nir_instr_type_intrinsic)
1702 continue;
1703
1704 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1705
1706 if ((mode == nir_var_shader_in && is_input(intrin)) ||
1707 (mode == nir_var_shader_out && is_output(intrin))) {
1708 nir_src *offset = nir_get_io_offset_src(intrin);
1709
1710 if (nir_src_is_const(*offset)) {
1711 intrin->const_index[0] += nir_src_as_uint(*offset);
1712 b->cursor = nir_before_instr(&intrin->instr);
1713 nir_instr_rewrite_src(&intrin->instr, offset,
1714 nir_src_for_ssa(nir_imm_int(b, 0)));
1715 progress = true;
1716 }
1717 }
1718 }
1719
1720 return progress;
1721 }
1722
1723 bool
1724 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode)
1725 {
1726 bool progress = false;
1727
1728 nir_foreach_function(f, nir) {
1729 if (f->impl) {
1730 nir_builder b;
1731 nir_builder_init(&b, f->impl);
1732 nir_foreach_block(block, f->impl) {
1733 progress |= add_const_offset_to_base_block(block, &b, mode);
1734 }
1735 }
1736 }
1737
1738 return progress;
1739 }
1740