nir: Move nir_lower_mediump_outputs from ir3
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37
38 #include "util/u_math.h"
39
40 struct lower_io_state {
41 void *dead_ctx;
42 nir_builder builder;
43 int (*type_size)(const struct glsl_type *type, bool);
44 nir_variable_mode modes;
45 nir_lower_io_options options;
46 };
47
48 static nir_intrinsic_op
49 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
50 {
51 switch (deref_op) {
52 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
53 OP(atomic_exchange)
54 OP(atomic_comp_swap)
55 OP(atomic_add)
56 OP(atomic_imin)
57 OP(atomic_umin)
58 OP(atomic_imax)
59 OP(atomic_umax)
60 OP(atomic_and)
61 OP(atomic_or)
62 OP(atomic_xor)
63 OP(atomic_fadd)
64 OP(atomic_fmin)
65 OP(atomic_fmax)
66 OP(atomic_fcomp_swap)
67 #undef OP
68 default:
69 unreachable("Invalid SSBO atomic");
70 }
71 }
72
73 static nir_intrinsic_op
74 global_atomic_for_deref(nir_intrinsic_op deref_op)
75 {
76 switch (deref_op) {
77 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
78 OP(atomic_exchange)
79 OP(atomic_comp_swap)
80 OP(atomic_add)
81 OP(atomic_imin)
82 OP(atomic_umin)
83 OP(atomic_imax)
84 OP(atomic_umax)
85 OP(atomic_and)
86 OP(atomic_or)
87 OP(atomic_xor)
88 OP(atomic_fadd)
89 OP(atomic_fmin)
90 OP(atomic_fmax)
91 OP(atomic_fcomp_swap)
92 #undef OP
93 default:
94 unreachable("Invalid SSBO atomic");
95 }
96 }
97
98 static nir_intrinsic_op
99 shared_atomic_for_deref(nir_intrinsic_op deref_op)
100 {
101 switch (deref_op) {
102 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
103 OP(atomic_exchange)
104 OP(atomic_comp_swap)
105 OP(atomic_add)
106 OP(atomic_imin)
107 OP(atomic_umin)
108 OP(atomic_imax)
109 OP(atomic_umax)
110 OP(atomic_and)
111 OP(atomic_or)
112 OP(atomic_xor)
113 OP(atomic_fadd)
114 OP(atomic_fmin)
115 OP(atomic_fmax)
116 OP(atomic_fcomp_swap)
117 #undef OP
118 default:
119 unreachable("Invalid shared atomic");
120 }
121 }
122
123 void
124 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
125 int (*type_size)(const struct glsl_type *, bool))
126 {
127 unsigned location = 0;
128
129 nir_foreach_variable(var, var_list) {
130 /*
131 * UBOs have their own address spaces, so don't count them towards the
132 * number of global uniforms
133 */
134 if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo)
135 continue;
136
137 var->data.driver_location = location;
138 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
139 var->data.mode == nir_var_shader_out ||
140 var->data.bindless;
141 location += type_size(var->type, bindless_type_size);
142 }
143
144 *size = location;
145 }
146
147 /**
148 * Return true if the given variable is a per-vertex input/output array.
149 * (such as geometry shader inputs).
150 */
151 bool
152 nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage)
153 {
154 if (var->data.patch || !glsl_type_is_array(var->type))
155 return false;
156
157 if (var->data.mode == nir_var_shader_in)
158 return stage == MESA_SHADER_GEOMETRY ||
159 stage == MESA_SHADER_TESS_CTRL ||
160 stage == MESA_SHADER_TESS_EVAL;
161
162 if (var->data.mode == nir_var_shader_out)
163 return stage == MESA_SHADER_TESS_CTRL;
164
165 return false;
166 }
167
168 static nir_ssa_def *
169 get_io_offset(nir_builder *b, nir_deref_instr *deref,
170 nir_ssa_def **vertex_index,
171 int (*type_size)(const struct glsl_type *, bool),
172 unsigned *component, bool bts)
173 {
174 nir_deref_path path;
175 nir_deref_path_init(&path, deref, NULL);
176
177 assert(path.path[0]->deref_type == nir_deref_type_var);
178 nir_deref_instr **p = &path.path[1];
179
180 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
181 * outermost array index separate. Process the rest normally.
182 */
183 if (vertex_index != NULL) {
184 assert((*p)->deref_type == nir_deref_type_array);
185 *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
186 p++;
187 }
188
189 if (path.path[0]->var->data.compact) {
190 assert((*p)->deref_type == nir_deref_type_array);
191 assert(glsl_type_is_scalar((*p)->type));
192
193 /* We always lower indirect dereferences for "compact" array vars. */
194 const unsigned index = nir_src_as_uint((*p)->arr.index);
195 const unsigned total_offset = *component + index;
196 const unsigned slot_offset = total_offset / 4;
197 *component = total_offset % 4;
198 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
199 }
200
201 /* Just emit code and let constant-folding go to town */
202 nir_ssa_def *offset = nir_imm_int(b, 0);
203
204 for (; *p; p++) {
205 if ((*p)->deref_type == nir_deref_type_array) {
206 unsigned size = type_size((*p)->type, bts);
207
208 nir_ssa_def *mul =
209 nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
210
211 offset = nir_iadd(b, offset, mul);
212 } else if ((*p)->deref_type == nir_deref_type_struct) {
213 /* p starts at path[1], so this is safe */
214 nir_deref_instr *parent = *(p - 1);
215
216 unsigned field_offset = 0;
217 for (unsigned i = 0; i < (*p)->strct.index; i++) {
218 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
219 }
220 offset = nir_iadd_imm(b, offset, field_offset);
221 } else {
222 unreachable("Unsupported deref type");
223 }
224 }
225
226 nir_deref_path_finish(&path);
227
228 return offset;
229 }
230
231 static nir_ssa_def *
232 emit_load(struct lower_io_state *state,
233 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
234 unsigned component, unsigned num_components, unsigned bit_size,
235 nir_alu_type type)
236 {
237 nir_builder *b = &state->builder;
238 const nir_shader *nir = b->shader;
239 nir_variable_mode mode = var->data.mode;
240 nir_ssa_def *barycentric = NULL;
241
242 nir_intrinsic_op op;
243 switch (mode) {
244 case nir_var_shader_in:
245 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
246 nir->options->use_interpolated_input_intrinsics &&
247 var->data.interpolation != INTERP_MODE_FLAT) {
248 if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
249 assert(vertex_index != NULL);
250 op = nir_intrinsic_load_input_vertex;
251 } else {
252 assert(vertex_index == NULL);
253
254 nir_intrinsic_op bary_op;
255 if (var->data.sample ||
256 (state->options & nir_lower_io_force_sample_interpolation))
257 bary_op = nir_intrinsic_load_barycentric_sample;
258 else if (var->data.centroid)
259 bary_op = nir_intrinsic_load_barycentric_centroid;
260 else
261 bary_op = nir_intrinsic_load_barycentric_pixel;
262
263 barycentric = nir_load_barycentric(&state->builder, bary_op,
264 var->data.interpolation);
265 op = nir_intrinsic_load_interpolated_input;
266 }
267 } else {
268 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
269 nir_intrinsic_load_input;
270 }
271 break;
272 case nir_var_shader_out:
273 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
274 nir_intrinsic_load_output;
275 break;
276 case nir_var_uniform:
277 op = nir_intrinsic_load_uniform;
278 break;
279 case nir_var_mem_shared:
280 op = nir_intrinsic_load_shared;
281 break;
282 default:
283 unreachable("Unknown variable mode");
284 }
285
286 nir_intrinsic_instr *load =
287 nir_intrinsic_instr_create(state->builder.shader, op);
288 load->num_components = num_components;
289
290 nir_intrinsic_set_base(load, var->data.driver_location);
291 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
292 nir_intrinsic_set_component(load, component);
293
294 if (load->intrinsic == nir_intrinsic_load_uniform)
295 nir_intrinsic_set_range(load,
296 state->type_size(var->type, var->data.bindless));
297
298 if (load->intrinsic == nir_intrinsic_load_input ||
299 load->intrinsic == nir_intrinsic_load_input_vertex ||
300 load->intrinsic == nir_intrinsic_load_uniform)
301 nir_intrinsic_set_type(load, type);
302
303 if (vertex_index) {
304 load->src[0] = nir_src_for_ssa(vertex_index);
305 load->src[1] = nir_src_for_ssa(offset);
306 } else if (barycentric) {
307 load->src[0] = nir_src_for_ssa(barycentric);
308 load->src[1] = nir_src_for_ssa(offset);
309 } else {
310 load->src[0] = nir_src_for_ssa(offset);
311 }
312
313 nir_ssa_dest_init(&load->instr, &load->dest,
314 num_components, bit_size, NULL);
315 nir_builder_instr_insert(b, &load->instr);
316
317 return &load->dest.ssa;
318 }
319
320 static nir_ssa_def *
321 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
322 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
323 unsigned component, const struct glsl_type *type)
324 {
325 assert(intrin->dest.is_ssa);
326 if (intrin->dest.ssa.bit_size == 64 &&
327 (state->options & nir_lower_io_lower_64bit_to_32)) {
328 nir_builder *b = &state->builder;
329
330 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
331
332 nir_ssa_def *comp64[4];
333 assert(component == 0 || component == 2);
334 unsigned dest_comp = 0;
335 while (dest_comp < intrin->dest.ssa.num_components) {
336 const unsigned num_comps =
337 MIN2(intrin->dest.ssa.num_components - dest_comp,
338 (4 - component) / 2);
339
340 nir_ssa_def *data32 =
341 emit_load(state, vertex_index, var, offset, component,
342 num_comps * 2, 32, nir_type_uint32);
343 for (unsigned i = 0; i < num_comps; i++) {
344 comp64[dest_comp + i] =
345 nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
346 }
347
348 /* Only the first store has a component offset */
349 component = 0;
350 dest_comp += num_comps;
351 offset = nir_iadd_imm(b, offset, slot_size);
352 }
353
354 return nir_vec(b, comp64, intrin->dest.ssa.num_components);
355 } else if (intrin->dest.ssa.bit_size == 1) {
356 /* Booleans are 32-bit */
357 assert(glsl_type_is_boolean(type));
358 return nir_b2b1(&state->builder,
359 emit_load(state, vertex_index, var, offset, component,
360 intrin->dest.ssa.num_components, 32,
361 nir_type_bool32));
362 } else {
363 return emit_load(state, vertex_index, var, offset, component,
364 intrin->dest.ssa.num_components,
365 intrin->dest.ssa.bit_size,
366 nir_get_nir_type_for_glsl_type(type));
367 }
368 }
369
370 static void
371 emit_store(struct lower_io_state *state, nir_ssa_def *data,
372 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
373 unsigned component, unsigned num_components,
374 nir_component_mask_t write_mask, nir_alu_type type)
375 {
376 nir_builder *b = &state->builder;
377 nir_variable_mode mode = var->data.mode;
378
379 nir_intrinsic_op op;
380 if (mode == nir_var_mem_shared) {
381 op = nir_intrinsic_store_shared;
382 } else {
383 assert(mode == nir_var_shader_out);
384 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
385 nir_intrinsic_store_output;
386 }
387
388 nir_intrinsic_instr *store =
389 nir_intrinsic_instr_create(state->builder.shader, op);
390 store->num_components = num_components;
391
392 store->src[0] = nir_src_for_ssa(data);
393
394 nir_intrinsic_set_base(store, var->data.driver_location);
395
396 if (mode == nir_var_shader_out)
397 nir_intrinsic_set_component(store, component);
398
399 if (store->intrinsic == nir_intrinsic_store_output)
400 nir_intrinsic_set_type(store, type);
401
402 nir_intrinsic_set_write_mask(store, write_mask);
403
404 if (vertex_index)
405 store->src[1] = nir_src_for_ssa(vertex_index);
406
407 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
408
409 nir_builder_instr_insert(b, &store->instr);
410 }
411
412 static void
413 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
414 nir_ssa_def *vertex_index, nir_variable *var, nir_ssa_def *offset,
415 unsigned component, const struct glsl_type *type)
416 {
417 assert(intrin->src[1].is_ssa);
418 if (intrin->src[1].ssa->bit_size == 64 &&
419 (state->options & nir_lower_io_lower_64bit_to_32)) {
420 nir_builder *b = &state->builder;
421
422 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
423
424 assert(component == 0 || component == 2);
425 unsigned src_comp = 0;
426 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
427 while (src_comp < intrin->num_components) {
428 const unsigned num_comps =
429 MIN2(intrin->num_components - src_comp,
430 (4 - component) / 2);
431
432 if (write_mask & BITFIELD_MASK(num_comps)) {
433 nir_ssa_def *data =
434 nir_channels(b, intrin->src[1].ssa,
435 BITFIELD_RANGE(src_comp, num_comps));
436 nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
437
438 nir_component_mask_t write_mask32 = 0;
439 for (unsigned i = 0; i < num_comps; i++) {
440 if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
441 write_mask32 |= 3 << (i * 2);
442 }
443
444 emit_store(state, data32, vertex_index, var, offset,
445 component, data32->num_components, write_mask32,
446 nir_type_uint32);
447 }
448
449 /* Only the first store has a component offset */
450 component = 0;
451 src_comp += num_comps;
452 write_mask >>= num_comps;
453 offset = nir_iadd_imm(b, offset, slot_size);
454 }
455 } else if (intrin->dest.ssa.bit_size == 1) {
456 /* Booleans are 32-bit */
457 assert(glsl_type_is_boolean(type));
458 nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
459 emit_store(state, b32_val, vertex_index, var, offset,
460 component, intrin->num_components,
461 nir_intrinsic_write_mask(intrin),
462 nir_type_bool32);
463 } else {
464 emit_store(state, intrin->src[1].ssa, vertex_index, var, offset,
465 component, intrin->num_components,
466 nir_intrinsic_write_mask(intrin),
467 nir_get_nir_type_for_glsl_type(type));
468 }
469 }
470
471 static nir_ssa_def *
472 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
473 nir_variable *var, nir_ssa_def *offset)
474 {
475 nir_builder *b = &state->builder;
476 assert(var->data.mode == nir_var_mem_shared);
477
478 nir_intrinsic_op op = shared_atomic_for_deref(intrin->intrinsic);
479
480 nir_intrinsic_instr *atomic =
481 nir_intrinsic_instr_create(state->builder.shader, op);
482
483 nir_intrinsic_set_base(atomic, var->data.driver_location);
484
485 atomic->src[0] = nir_src_for_ssa(offset);
486 assert(nir_intrinsic_infos[intrin->intrinsic].num_srcs ==
487 nir_intrinsic_infos[op].num_srcs);
488 for (unsigned i = 1; i < nir_intrinsic_infos[op].num_srcs; i++) {
489 nir_src_copy(&atomic->src[i], &intrin->src[i], atomic);
490 }
491
492 if (nir_intrinsic_infos[op].has_dest) {
493 assert(intrin->dest.is_ssa);
494 assert(nir_intrinsic_infos[intrin->intrinsic].has_dest);
495 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
496 intrin->dest.ssa.num_components,
497 intrin->dest.ssa.bit_size, NULL);
498 }
499
500 nir_builder_instr_insert(b, &atomic->instr);
501
502 return nir_intrinsic_infos[op].has_dest ? &atomic->dest.ssa : NULL;
503 }
504
505 static nir_ssa_def *
506 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
507 nir_variable *var, nir_ssa_def *offset, unsigned component,
508 const struct glsl_type *type)
509 {
510 nir_builder *b = &state->builder;
511 assert(var->data.mode == nir_var_shader_in);
512
513 /* Ignore interpolateAt() for flat variables - flat is flat. Lower
514 * interpolateAtVertex() for explicit variables.
515 */
516 if (var->data.interpolation == INTERP_MODE_FLAT ||
517 var->data.interpolation == INTERP_MODE_EXPLICIT) {
518 nir_ssa_def *vertex_index = NULL;
519
520 if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
521 assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
522 vertex_index = intrin->src[1].ssa;
523 }
524
525 return lower_load(intrin, state, vertex_index, var, offset, component, type);
526 }
527
528 /* None of the supported APIs allow interpolation on 64-bit things */
529 assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
530
531 nir_intrinsic_op bary_op;
532 switch (intrin->intrinsic) {
533 case nir_intrinsic_interp_deref_at_centroid:
534 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
535 nir_intrinsic_load_barycentric_sample :
536 nir_intrinsic_load_barycentric_centroid;
537 break;
538 case nir_intrinsic_interp_deref_at_sample:
539 bary_op = nir_intrinsic_load_barycentric_at_sample;
540 break;
541 case nir_intrinsic_interp_deref_at_offset:
542 bary_op = nir_intrinsic_load_barycentric_at_offset;
543 break;
544 default:
545 unreachable("Bogus interpolateAt() intrinsic.");
546 }
547
548 nir_intrinsic_instr *bary_setup =
549 nir_intrinsic_instr_create(state->builder.shader, bary_op);
550
551 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
552 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
553
554 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
555 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
556 intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
557 nir_src_copy(&bary_setup->src[0], &intrin->src[1], bary_setup);
558
559 nir_builder_instr_insert(b, &bary_setup->instr);
560
561 nir_intrinsic_instr *load =
562 nir_intrinsic_instr_create(state->builder.shader,
563 nir_intrinsic_load_interpolated_input);
564 load->num_components = intrin->num_components;
565
566 nir_intrinsic_set_base(load, var->data.driver_location);
567 nir_intrinsic_set_component(load, component);
568
569 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
570 load->src[1] = nir_src_for_ssa(offset);
571
572 assert(intrin->dest.is_ssa);
573 nir_ssa_dest_init(&load->instr, &load->dest,
574 intrin->dest.ssa.num_components,
575 intrin->dest.ssa.bit_size, NULL);
576 nir_builder_instr_insert(b, &load->instr);
577
578 return &load->dest.ssa;
579 }
580
581 static bool
582 nir_lower_io_block(nir_block *block,
583 struct lower_io_state *state)
584 {
585 nir_builder *b = &state->builder;
586 const nir_shader_compiler_options *options = b->shader->options;
587 bool progress = false;
588
589 nir_foreach_instr_safe(instr, block) {
590 if (instr->type != nir_instr_type_intrinsic)
591 continue;
592
593 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
594
595 switch (intrin->intrinsic) {
596 case nir_intrinsic_load_deref:
597 case nir_intrinsic_store_deref:
598 case nir_intrinsic_deref_atomic_add:
599 case nir_intrinsic_deref_atomic_imin:
600 case nir_intrinsic_deref_atomic_umin:
601 case nir_intrinsic_deref_atomic_imax:
602 case nir_intrinsic_deref_atomic_umax:
603 case nir_intrinsic_deref_atomic_and:
604 case nir_intrinsic_deref_atomic_or:
605 case nir_intrinsic_deref_atomic_xor:
606 case nir_intrinsic_deref_atomic_exchange:
607 case nir_intrinsic_deref_atomic_comp_swap:
608 case nir_intrinsic_deref_atomic_fadd:
609 case nir_intrinsic_deref_atomic_fmin:
610 case nir_intrinsic_deref_atomic_fmax:
611 case nir_intrinsic_deref_atomic_fcomp_swap:
612 /* We can lower the io for this nir instrinsic */
613 break;
614 case nir_intrinsic_interp_deref_at_centroid:
615 case nir_intrinsic_interp_deref_at_sample:
616 case nir_intrinsic_interp_deref_at_offset:
617 case nir_intrinsic_interp_deref_at_vertex:
618 /* We can optionally lower these to load_interpolated_input */
619 if (options->use_interpolated_input_intrinsics)
620 break;
621 default:
622 /* We can't lower the io for this nir instrinsic, so skip it */
623 continue;
624 }
625
626 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
627
628 nir_variable_mode mode = deref->mode;
629
630 if ((state->modes & mode) == 0)
631 continue;
632
633 if (mode != nir_var_shader_in &&
634 mode != nir_var_shader_out &&
635 mode != nir_var_mem_shared &&
636 mode != nir_var_uniform)
637 continue;
638
639 nir_variable *var = nir_deref_instr_get_variable(deref);
640
641 b->cursor = nir_before_instr(instr);
642
643 const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
644
645 nir_ssa_def *offset;
646 nir_ssa_def *vertex_index = NULL;
647 unsigned component_offset = var->data.location_frac;
648 bool bindless_type_size = mode == nir_var_shader_in ||
649 mode == nir_var_shader_out ||
650 var->data.bindless;
651
652 offset = get_io_offset(b, deref, per_vertex ? &vertex_index : NULL,
653 state->type_size, &component_offset,
654 bindless_type_size);
655
656 nir_ssa_def *replacement = NULL;
657
658 switch (intrin->intrinsic) {
659 case nir_intrinsic_load_deref:
660 replacement = lower_load(intrin, state, vertex_index, var, offset,
661 component_offset, deref->type);
662 break;
663
664 case nir_intrinsic_store_deref:
665 lower_store(intrin, state, vertex_index, var, offset,
666 component_offset, deref->type);
667 break;
668
669 case nir_intrinsic_deref_atomic_add:
670 case nir_intrinsic_deref_atomic_imin:
671 case nir_intrinsic_deref_atomic_umin:
672 case nir_intrinsic_deref_atomic_imax:
673 case nir_intrinsic_deref_atomic_umax:
674 case nir_intrinsic_deref_atomic_and:
675 case nir_intrinsic_deref_atomic_or:
676 case nir_intrinsic_deref_atomic_xor:
677 case nir_intrinsic_deref_atomic_exchange:
678 case nir_intrinsic_deref_atomic_comp_swap:
679 case nir_intrinsic_deref_atomic_fadd:
680 case nir_intrinsic_deref_atomic_fmin:
681 case nir_intrinsic_deref_atomic_fmax:
682 case nir_intrinsic_deref_atomic_fcomp_swap:
683 assert(vertex_index == NULL);
684 replacement = lower_atomic(intrin, state, var, offset);
685 break;
686
687 case nir_intrinsic_interp_deref_at_centroid:
688 case nir_intrinsic_interp_deref_at_sample:
689 case nir_intrinsic_interp_deref_at_offset:
690 case nir_intrinsic_interp_deref_at_vertex:
691 assert(vertex_index == NULL);
692 replacement = lower_interpolate_at(intrin, state, var, offset,
693 component_offset, deref->type);
694 break;
695
696 default:
697 continue;
698 }
699
700 if (replacement) {
701 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
702 nir_src_for_ssa(replacement));
703 }
704 nir_instr_remove(&intrin->instr);
705 progress = true;
706 }
707
708 return progress;
709 }
710
711 static bool
712 nir_lower_io_impl(nir_function_impl *impl,
713 nir_variable_mode modes,
714 int (*type_size)(const struct glsl_type *, bool),
715 nir_lower_io_options options)
716 {
717 struct lower_io_state state;
718 bool progress = false;
719
720 nir_builder_init(&state.builder, impl);
721 state.dead_ctx = ralloc_context(NULL);
722 state.modes = modes;
723 state.type_size = type_size;
724 state.options = options;
725
726 nir_foreach_block(block, impl) {
727 progress |= nir_lower_io_block(block, &state);
728 }
729
730 ralloc_free(state.dead_ctx);
731
732 nir_metadata_preserve(impl, nir_metadata_block_index |
733 nir_metadata_dominance);
734 return progress;
735 }
736
737 bool
738 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
739 int (*type_size)(const struct glsl_type *, bool),
740 nir_lower_io_options options)
741 {
742 bool progress = false;
743
744 nir_foreach_function(function, shader) {
745 if (function->impl) {
746 progress |= nir_lower_io_impl(function->impl, modes,
747 type_size, options);
748 }
749 }
750
751 return progress;
752 }
753
754 static unsigned
755 type_scalar_size_bytes(const struct glsl_type *type)
756 {
757 assert(glsl_type_is_vector_or_scalar(type) ||
758 glsl_type_is_matrix(type));
759 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
760 }
761
762 static nir_ssa_def *
763 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
764 nir_address_format addr_format, nir_ssa_def *offset)
765 {
766 assert(offset->num_components == 1);
767 assert(addr->bit_size == offset->bit_size);
768
769 switch (addr_format) {
770 case nir_address_format_32bit_global:
771 case nir_address_format_64bit_global:
772 case nir_address_format_32bit_offset:
773 assert(addr->num_components == 1);
774 return nir_iadd(b, addr, offset);
775
776 case nir_address_format_64bit_bounded_global:
777 assert(addr->num_components == 4);
778 return nir_vec4(b, nir_channel(b, addr, 0),
779 nir_channel(b, addr, 1),
780 nir_channel(b, addr, 2),
781 nir_iadd(b, nir_channel(b, addr, 3), offset));
782
783 case nir_address_format_32bit_index_offset:
784 assert(addr->num_components == 2);
785 return nir_vec2(b, nir_channel(b, addr, 0),
786 nir_iadd(b, nir_channel(b, addr, 1), offset));
787 case nir_address_format_logical:
788 unreachable("Unsupported address format");
789 }
790 unreachable("Invalid address format");
791 }
792
793 static nir_ssa_def *
794 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
795 nir_address_format addr_format, int64_t offset)
796 {
797 return build_addr_iadd(b, addr, addr_format,
798 nir_imm_intN_t(b, offset, addr->bit_size));
799 }
800
801 static nir_ssa_def *
802 addr_to_index(nir_builder *b, nir_ssa_def *addr,
803 nir_address_format addr_format)
804 {
805 assert(addr_format == nir_address_format_32bit_index_offset);
806 assert(addr->num_components == 2);
807 return nir_channel(b, addr, 0);
808 }
809
810 static nir_ssa_def *
811 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
812 nir_address_format addr_format)
813 {
814 assert(addr_format == nir_address_format_32bit_index_offset);
815 assert(addr->num_components == 2);
816 return nir_channel(b, addr, 1);
817 }
818
819 /** Returns true if the given address format resolves to a global address */
820 static bool
821 addr_format_is_global(nir_address_format addr_format)
822 {
823 return addr_format == nir_address_format_32bit_global ||
824 addr_format == nir_address_format_64bit_global ||
825 addr_format == nir_address_format_64bit_bounded_global;
826 }
827
828 static nir_ssa_def *
829 addr_to_global(nir_builder *b, nir_ssa_def *addr,
830 nir_address_format addr_format)
831 {
832 switch (addr_format) {
833 case nir_address_format_32bit_global:
834 case nir_address_format_64bit_global:
835 assert(addr->num_components == 1);
836 return addr;
837
838 case nir_address_format_64bit_bounded_global:
839 assert(addr->num_components == 4);
840 return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
841 nir_u2u64(b, nir_channel(b, addr, 3)));
842
843 case nir_address_format_32bit_index_offset:
844 case nir_address_format_32bit_offset:
845 case nir_address_format_logical:
846 unreachable("Cannot get a 64-bit address with this address format");
847 }
848
849 unreachable("Invalid address format");
850 }
851
852 static bool
853 addr_format_needs_bounds_check(nir_address_format addr_format)
854 {
855 return addr_format == nir_address_format_64bit_bounded_global;
856 }
857
858 static nir_ssa_def *
859 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
860 nir_address_format addr_format, unsigned size)
861 {
862 assert(addr_format == nir_address_format_64bit_bounded_global);
863 assert(addr->num_components == 4);
864 return nir_ige(b, nir_channel(b, addr, 2),
865 nir_iadd_imm(b, nir_channel(b, addr, 3), size));
866 }
867
868 static nir_ssa_def *
869 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
870 nir_ssa_def *addr, nir_address_format addr_format,
871 unsigned num_components)
872 {
873 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
874
875 nir_intrinsic_op op;
876 switch (mode) {
877 case nir_var_mem_ubo:
878 op = nir_intrinsic_load_ubo;
879 break;
880 case nir_var_mem_ssbo:
881 if (addr_format_is_global(addr_format))
882 op = nir_intrinsic_load_global;
883 else
884 op = nir_intrinsic_load_ssbo;
885 break;
886 case nir_var_mem_global:
887 assert(addr_format_is_global(addr_format));
888 op = nir_intrinsic_load_global;
889 break;
890 case nir_var_shader_in:
891 assert(addr_format_is_global(addr_format));
892 op = nir_intrinsic_load_kernel_input;
893 break;
894 case nir_var_mem_shared:
895 assert(addr_format == nir_address_format_32bit_offset);
896 op = nir_intrinsic_load_shared;
897 break;
898 default:
899 unreachable("Unsupported explicit IO variable mode");
900 }
901
902 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
903
904 if (addr_format_is_global(addr_format)) {
905 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
906 } else if (addr_format == nir_address_format_32bit_offset) {
907 assert(addr->num_components == 1);
908 load->src[0] = nir_src_for_ssa(addr);
909 } else {
910 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
911 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
912 }
913
914 if (mode != nir_var_mem_ubo && mode != nir_var_shader_in && mode != nir_var_mem_shared)
915 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
916
917 unsigned bit_size = intrin->dest.ssa.bit_size;
918 if (bit_size == 1) {
919 /* TODO: Make the native bool bit_size an option. */
920 bit_size = 32;
921 }
922
923 /* TODO: We should try and provide a better alignment. For OpenCL, we need
924 * to plumb the alignment through from SPIR-V when we have one.
925 */
926 nir_intrinsic_set_align(load, bit_size / 8, 0);
927
928 assert(intrin->dest.is_ssa);
929 load->num_components = num_components;
930 nir_ssa_dest_init(&load->instr, &load->dest, num_components,
931 bit_size, intrin->dest.ssa.name);
932
933 assert(bit_size % 8 == 0);
934
935 nir_ssa_def *result;
936 if (addr_format_needs_bounds_check(addr_format)) {
937 /* The Vulkan spec for robustBufferAccess gives us quite a few options
938 * as to what we can do with an OOB read. Unfortunately, returning
939 * undefined values isn't one of them so we return an actual zero.
940 */
941 nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
942
943 const unsigned load_size = (bit_size / 8) * load->num_components;
944 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
945
946 nir_builder_instr_insert(b, &load->instr);
947
948 nir_pop_if(b, NULL);
949
950 result = nir_if_phi(b, &load->dest.ssa, zero);
951 } else {
952 nir_builder_instr_insert(b, &load->instr);
953 result = &load->dest.ssa;
954 }
955
956 if (intrin->dest.ssa.bit_size == 1) {
957 /* For shared, we can go ahead and use NIR's and/or the back-end's
958 * standard encoding for booleans rather than forcing a 0/1 boolean.
959 * This should save an instruction or two.
960 */
961 if (mode == nir_var_mem_shared)
962 result = nir_b2b1(b, result);
963 else
964 result = nir_i2b(b, result);
965 }
966
967 return result;
968 }
969
970 static void
971 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
972 nir_ssa_def *addr, nir_address_format addr_format,
973 nir_ssa_def *value, nir_component_mask_t write_mask)
974 {
975 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
976
977 nir_intrinsic_op op;
978 switch (mode) {
979 case nir_var_mem_ssbo:
980 if (addr_format_is_global(addr_format))
981 op = nir_intrinsic_store_global;
982 else
983 op = nir_intrinsic_store_ssbo;
984 break;
985 case nir_var_mem_global:
986 assert(addr_format_is_global(addr_format));
987 op = nir_intrinsic_store_global;
988 break;
989 case nir_var_mem_shared:
990 assert(addr_format == nir_address_format_32bit_offset);
991 op = nir_intrinsic_store_shared;
992 break;
993 default:
994 unreachable("Unsupported explicit IO variable mode");
995 }
996
997 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
998
999 if (value->bit_size == 1) {
1000 /* For shared, we can go ahead and use NIR's and/or the back-end's
1001 * standard encoding for booleans rather than forcing a 0/1 boolean.
1002 * This should save an instruction or two.
1003 *
1004 * TODO: Make the native bool bit_size an option.
1005 */
1006 if (mode == nir_var_mem_shared)
1007 value = nir_b2b32(b, value);
1008 else
1009 value = nir_b2i(b, value, 32);
1010 }
1011
1012 store->src[0] = nir_src_for_ssa(value);
1013 if (addr_format_is_global(addr_format)) {
1014 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1015 } else if (addr_format == nir_address_format_32bit_offset) {
1016 assert(addr->num_components == 1);
1017 store->src[1] = nir_src_for_ssa(addr);
1018 } else {
1019 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1020 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1021 }
1022
1023 nir_intrinsic_set_write_mask(store, write_mask);
1024
1025 if (mode != nir_var_mem_shared)
1026 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1027
1028 /* TODO: We should try and provide a better alignment. For OpenCL, we need
1029 * to plumb the alignment through from SPIR-V when we have one.
1030 */
1031 nir_intrinsic_set_align(store, value->bit_size / 8, 0);
1032
1033 assert(value->num_components == 1 ||
1034 value->num_components == intrin->num_components);
1035 store->num_components = value->num_components;
1036
1037 assert(value->bit_size % 8 == 0);
1038
1039 if (addr_format_needs_bounds_check(addr_format)) {
1040 const unsigned store_size = (value->bit_size / 8) * store->num_components;
1041 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1042
1043 nir_builder_instr_insert(b, &store->instr);
1044
1045 nir_pop_if(b, NULL);
1046 } else {
1047 nir_builder_instr_insert(b, &store->instr);
1048 }
1049 }
1050
1051 static nir_ssa_def *
1052 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1053 nir_ssa_def *addr, nir_address_format addr_format)
1054 {
1055 nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
1056 const unsigned num_data_srcs =
1057 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1058
1059 nir_intrinsic_op op;
1060 switch (mode) {
1061 case nir_var_mem_ssbo:
1062 if (addr_format_is_global(addr_format))
1063 op = global_atomic_for_deref(intrin->intrinsic);
1064 else
1065 op = ssbo_atomic_for_deref(intrin->intrinsic);
1066 break;
1067 case nir_var_mem_global:
1068 assert(addr_format_is_global(addr_format));
1069 op = global_atomic_for_deref(intrin->intrinsic);
1070 break;
1071 case nir_var_mem_shared:
1072 assert(addr_format == nir_address_format_32bit_offset);
1073 op = shared_atomic_for_deref(intrin->intrinsic);
1074 break;
1075 default:
1076 unreachable("Unsupported explicit IO variable mode");
1077 }
1078
1079 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1080
1081 unsigned src = 0;
1082 if (addr_format_is_global(addr_format)) {
1083 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1084 } else if (addr_format == nir_address_format_32bit_offset) {
1085 assert(addr->num_components == 1);
1086 atomic->src[src++] = nir_src_for_ssa(addr);
1087 } else {
1088 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1089 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1090 }
1091 for (unsigned i = 0; i < num_data_srcs; i++) {
1092 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1093 }
1094
1095 /* Global atomics don't have access flags because they assume that the
1096 * address may be non-uniform.
1097 */
1098 if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared)
1099 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1100
1101 assert(intrin->dest.ssa.num_components == 1);
1102 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
1103 1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
1104
1105 assert(atomic->dest.ssa.bit_size % 8 == 0);
1106
1107 if (addr_format_needs_bounds_check(addr_format)) {
1108 const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
1109 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1110
1111 nir_builder_instr_insert(b, &atomic->instr);
1112
1113 nir_pop_if(b, NULL);
1114 return nir_if_phi(b, &atomic->dest.ssa,
1115 nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
1116 } else {
1117 nir_builder_instr_insert(b, &atomic->instr);
1118 return &atomic->dest.ssa;
1119 }
1120 }
1121
1122 nir_ssa_def *
1123 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1124 nir_ssa_def *base_addr,
1125 nir_address_format addr_format)
1126 {
1127 assert(deref->dest.is_ssa);
1128 switch (deref->deref_type) {
1129 case nir_deref_type_var:
1130 assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared));
1131 return nir_imm_intN_t(b, deref->var->data.driver_location,
1132 deref->dest.ssa.bit_size);
1133
1134 case nir_deref_type_array: {
1135 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1136
1137 unsigned stride = glsl_get_explicit_stride(parent->type);
1138 if ((glsl_type_is_matrix(parent->type) &&
1139 glsl_matrix_type_is_row_major(parent->type)) ||
1140 (glsl_type_is_vector(parent->type) && stride == 0))
1141 stride = type_scalar_size_bytes(parent->type);
1142
1143 assert(stride > 0);
1144
1145 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1146 index = nir_i2i(b, index, base_addr->bit_size);
1147 return build_addr_iadd(b, base_addr, addr_format,
1148 nir_amul_imm(b, index, stride));
1149 }
1150
1151 case nir_deref_type_ptr_as_array: {
1152 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1153 index = nir_i2i(b, index, base_addr->bit_size);
1154 unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
1155 return build_addr_iadd(b, base_addr, addr_format,
1156 nir_amul_imm(b, index, stride));
1157 }
1158
1159 case nir_deref_type_array_wildcard:
1160 unreachable("Wildcards should be lowered by now");
1161 break;
1162
1163 case nir_deref_type_struct: {
1164 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1165 int offset = glsl_get_struct_field_offset(parent->type,
1166 deref->strct.index);
1167 assert(offset >= 0);
1168 return build_addr_iadd_imm(b, base_addr, addr_format, offset);
1169 }
1170
1171 case nir_deref_type_cast:
1172 /* Nothing to do here */
1173 return base_addr;
1174 }
1175
1176 unreachable("Invalid NIR deref type");
1177 }
1178
1179 void
1180 nir_lower_explicit_io_instr(nir_builder *b,
1181 nir_intrinsic_instr *intrin,
1182 nir_ssa_def *addr,
1183 nir_address_format addr_format)
1184 {
1185 b->cursor = nir_after_instr(&intrin->instr);
1186
1187 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1188 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1189 unsigned scalar_size = type_scalar_size_bytes(deref->type);
1190 assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
1191 assert(vec_stride == 0 || vec_stride >= scalar_size);
1192
1193 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1194 nir_ssa_def *value;
1195 if (vec_stride > scalar_size) {
1196 nir_ssa_def *comps[4] = { NULL, };
1197 for (unsigned i = 0; i < intrin->num_components; i++) {
1198 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1199 vec_stride * i);
1200 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1201 addr_format, 1);
1202 }
1203 value = nir_vec(b, comps, intrin->num_components);
1204 } else {
1205 value = build_explicit_io_load(b, intrin, addr, addr_format,
1206 intrin->num_components);
1207 }
1208 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1209 } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
1210 assert(intrin->src[1].is_ssa);
1211 nir_ssa_def *value = intrin->src[1].ssa;
1212 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1213 if (vec_stride > scalar_size) {
1214 for (unsigned i = 0; i < intrin->num_components; i++) {
1215 if (!(write_mask & (1 << i)))
1216 continue;
1217
1218 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1219 vec_stride * i);
1220 build_explicit_io_store(b, intrin, comp_addr, addr_format,
1221 nir_channel(b, value, i), 1);
1222 }
1223 } else {
1224 build_explicit_io_store(b, intrin, addr, addr_format,
1225 value, write_mask);
1226 }
1227 } else {
1228 nir_ssa_def *value =
1229 build_explicit_io_atomic(b, intrin, addr, addr_format);
1230 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
1231 }
1232
1233 nir_instr_remove(&intrin->instr);
1234 }
1235
1236 static void
1237 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
1238 nir_address_format addr_format)
1239 {
1240 /* Just delete the deref if it's not used. We can't use
1241 * nir_deref_instr_remove_if_unused here because it may remove more than
1242 * one deref which could break our list walking since we walk the list
1243 * backwards.
1244 */
1245 assert(list_is_empty(&deref->dest.ssa.if_uses));
1246 if (list_is_empty(&deref->dest.ssa.uses)) {
1247 nir_instr_remove(&deref->instr);
1248 return;
1249 }
1250
1251 b->cursor = nir_after_instr(&deref->instr);
1252
1253 nir_ssa_def *base_addr = NULL;
1254 if (deref->deref_type != nir_deref_type_var) {
1255 assert(deref->parent.is_ssa);
1256 base_addr = deref->parent.ssa;
1257 }
1258
1259 nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
1260 addr_format);
1261
1262 nir_instr_remove(&deref->instr);
1263 nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
1264 }
1265
1266 static void
1267 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
1268 nir_address_format addr_format)
1269 {
1270 assert(intrin->src[0].is_ssa);
1271 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
1272 }
1273
1274 static void
1275 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
1276 nir_address_format addr_format)
1277 {
1278 b->cursor = nir_after_instr(&intrin->instr);
1279
1280 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1281
1282 assert(glsl_type_is_array(deref->type));
1283 assert(glsl_get_length(deref->type) == 0);
1284 unsigned stride = glsl_get_explicit_stride(deref->type);
1285 assert(stride > 0);
1286
1287 assert(addr_format == nir_address_format_32bit_index_offset);
1288 nir_ssa_def *addr = &deref->dest.ssa;
1289 nir_ssa_def *index = addr_to_index(b, addr, addr_format);
1290 nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
1291
1292 nir_intrinsic_instr *bsize =
1293 nir_intrinsic_instr_create(b->shader, nir_intrinsic_get_buffer_size);
1294 bsize->src[0] = nir_src_for_ssa(index);
1295 nir_ssa_dest_init(&bsize->instr, &bsize->dest, 1, 32, NULL);
1296 nir_builder_instr_insert(b, &bsize->instr);
1297
1298 nir_ssa_def *arr_size =
1299 nir_idiv(b, nir_isub(b, &bsize->dest.ssa, offset),
1300 nir_imm_int(b, stride));
1301
1302 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(arr_size));
1303 nir_instr_remove(&intrin->instr);
1304 }
1305
1306 static bool
1307 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
1308 nir_address_format addr_format)
1309 {
1310 bool progress = false;
1311
1312 nir_builder b;
1313 nir_builder_init(&b, impl);
1314
1315 /* Walk in reverse order so that we can see the full deref chain when we
1316 * lower the access operations. We lower them assuming that the derefs
1317 * will be turned into address calculations later.
1318 */
1319 nir_foreach_block_reverse(block, impl) {
1320 nir_foreach_instr_reverse_safe(instr, block) {
1321 switch (instr->type) {
1322 case nir_instr_type_deref: {
1323 nir_deref_instr *deref = nir_instr_as_deref(instr);
1324 if (deref->mode & modes) {
1325 lower_explicit_io_deref(&b, deref, addr_format);
1326 progress = true;
1327 }
1328 break;
1329 }
1330
1331 case nir_instr_type_intrinsic: {
1332 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1333 switch (intrin->intrinsic) {
1334 case nir_intrinsic_load_deref:
1335 case nir_intrinsic_store_deref:
1336 case nir_intrinsic_deref_atomic_add:
1337 case nir_intrinsic_deref_atomic_imin:
1338 case nir_intrinsic_deref_atomic_umin:
1339 case nir_intrinsic_deref_atomic_imax:
1340 case nir_intrinsic_deref_atomic_umax:
1341 case nir_intrinsic_deref_atomic_and:
1342 case nir_intrinsic_deref_atomic_or:
1343 case nir_intrinsic_deref_atomic_xor:
1344 case nir_intrinsic_deref_atomic_exchange:
1345 case nir_intrinsic_deref_atomic_comp_swap:
1346 case nir_intrinsic_deref_atomic_fadd:
1347 case nir_intrinsic_deref_atomic_fmin:
1348 case nir_intrinsic_deref_atomic_fmax:
1349 case nir_intrinsic_deref_atomic_fcomp_swap: {
1350 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1351 if (deref->mode & modes) {
1352 lower_explicit_io_access(&b, intrin, addr_format);
1353 progress = true;
1354 }
1355 break;
1356 }
1357
1358 case nir_intrinsic_deref_buffer_array_length: {
1359 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1360 if (deref->mode & modes) {
1361 lower_explicit_io_array_length(&b, intrin, addr_format);
1362 progress = true;
1363 }
1364 break;
1365 }
1366
1367 default:
1368 break;
1369 }
1370 break;
1371 }
1372
1373 default:
1374 /* Nothing to do */
1375 break;
1376 }
1377 }
1378 }
1379
1380 if (progress) {
1381 nir_metadata_preserve(impl, nir_metadata_block_index |
1382 nir_metadata_dominance);
1383 }
1384
1385 return progress;
1386 }
1387
1388 bool
1389 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
1390 nir_address_format addr_format)
1391 {
1392 bool progress = false;
1393
1394 nir_foreach_function(function, shader) {
1395 if (function->impl &&
1396 nir_lower_explicit_io_impl(function->impl, modes, addr_format))
1397 progress = true;
1398 }
1399
1400 return progress;
1401 }
1402
1403 static bool
1404 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
1405 nir_variable_mode modes,
1406 glsl_type_size_align_func type_info)
1407 {
1408 bool progress = false;
1409
1410 nir_foreach_block(block, impl) {
1411 nir_foreach_instr(instr, block) {
1412 if (instr->type != nir_instr_type_deref)
1413 continue;
1414
1415 nir_deref_instr *deref = nir_instr_as_deref(instr);
1416 if (!(deref->mode & modes))
1417 continue;
1418
1419 unsigned size, alignment;
1420 const struct glsl_type *new_type =
1421 glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
1422 if (new_type != deref->type) {
1423 progress = true;
1424 deref->type = new_type;
1425 }
1426 if (deref->deref_type == nir_deref_type_cast) {
1427 /* See also glsl_type::get_explicit_type_for_size_align() */
1428 unsigned new_stride = align(size, alignment);
1429 if (new_stride != deref->cast.ptr_stride) {
1430 deref->cast.ptr_stride = new_stride;
1431 progress = true;
1432 }
1433 }
1434 }
1435 }
1436
1437 if (progress) {
1438 nir_metadata_preserve(impl, nir_metadata_block_index |
1439 nir_metadata_dominance |
1440 nir_metadata_live_ssa_defs |
1441 nir_metadata_loop_analysis);
1442 }
1443
1444 return progress;
1445 }
1446
1447 static bool
1448 lower_vars_to_explicit(nir_shader *shader,
1449 struct exec_list *vars, nir_variable_mode mode,
1450 glsl_type_size_align_func type_info)
1451 {
1452 bool progress = false;
1453 unsigned offset = 0;
1454 nir_foreach_variable(var, vars) {
1455 unsigned size, align;
1456 const struct glsl_type *explicit_type =
1457 glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
1458
1459 if (explicit_type != var->type) {
1460 progress = true;
1461 var->type = explicit_type;
1462 }
1463
1464 var->data.driver_location = ALIGN_POT(offset, align);
1465 offset = var->data.driver_location + size;
1466 }
1467
1468 if (mode == nir_var_mem_shared) {
1469 shader->info.cs.shared_size = offset;
1470 shader->num_shared = offset;
1471 }
1472
1473 return progress;
1474 }
1475
1476 bool
1477 nir_lower_vars_to_explicit_types(nir_shader *shader,
1478 nir_variable_mode modes,
1479 glsl_type_size_align_func type_info)
1480 {
1481 /* TODO: Situations which need to be handled to support more modes:
1482 * - row-major matrices
1483 * - compact shader inputs/outputs
1484 * - interface types
1485 */
1486 ASSERTED nir_variable_mode supported = nir_var_mem_shared |
1487 nir_var_shader_temp | nir_var_function_temp;
1488 assert(!(modes & ~supported) && "unsupported");
1489
1490 bool progress = false;
1491
1492 if (modes & nir_var_mem_shared)
1493 progress |= lower_vars_to_explicit(shader, &shader->shared, nir_var_mem_shared, type_info);
1494 if (modes & nir_var_shader_temp)
1495 progress |= lower_vars_to_explicit(shader, &shader->globals, nir_var_shader_temp, type_info);
1496
1497 nir_foreach_function(function, shader) {
1498 if (function->impl) {
1499 if (modes & nir_var_function_temp)
1500 progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
1501
1502 progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
1503 }
1504 }
1505
1506 return progress;
1507 }
1508
1509 /**
1510 * Return the offset source for a load/store intrinsic.
1511 */
1512 nir_src *
1513 nir_get_io_offset_src(nir_intrinsic_instr *instr)
1514 {
1515 switch (instr->intrinsic) {
1516 case nir_intrinsic_load_input:
1517 case nir_intrinsic_load_output:
1518 case nir_intrinsic_load_shared:
1519 case nir_intrinsic_load_uniform:
1520 case nir_intrinsic_load_global:
1521 case nir_intrinsic_load_scratch:
1522 case nir_intrinsic_load_fs_input_interp_deltas:
1523 return &instr->src[0];
1524 case nir_intrinsic_load_ubo:
1525 case nir_intrinsic_load_ssbo:
1526 case nir_intrinsic_load_per_vertex_input:
1527 case nir_intrinsic_load_per_vertex_output:
1528 case nir_intrinsic_load_interpolated_input:
1529 case nir_intrinsic_store_output:
1530 case nir_intrinsic_store_shared:
1531 case nir_intrinsic_store_global:
1532 case nir_intrinsic_store_scratch:
1533 case nir_intrinsic_ssbo_atomic_add:
1534 case nir_intrinsic_ssbo_atomic_imin:
1535 case nir_intrinsic_ssbo_atomic_umin:
1536 case nir_intrinsic_ssbo_atomic_imax:
1537 case nir_intrinsic_ssbo_atomic_umax:
1538 case nir_intrinsic_ssbo_atomic_and:
1539 case nir_intrinsic_ssbo_atomic_or:
1540 case nir_intrinsic_ssbo_atomic_xor:
1541 case nir_intrinsic_ssbo_atomic_exchange:
1542 case nir_intrinsic_ssbo_atomic_comp_swap:
1543 case nir_intrinsic_ssbo_atomic_fadd:
1544 case nir_intrinsic_ssbo_atomic_fmin:
1545 case nir_intrinsic_ssbo_atomic_fmax:
1546 case nir_intrinsic_ssbo_atomic_fcomp_swap:
1547 return &instr->src[1];
1548 case nir_intrinsic_store_ssbo:
1549 case nir_intrinsic_store_per_vertex_output:
1550 return &instr->src[2];
1551 default:
1552 return NULL;
1553 }
1554 }
1555
1556 /**
1557 * Return the vertex index source for a load/store per_vertex intrinsic.
1558 */
1559 nir_src *
1560 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
1561 {
1562 switch (instr->intrinsic) {
1563 case nir_intrinsic_load_per_vertex_input:
1564 case nir_intrinsic_load_per_vertex_output:
1565 return &instr->src[0];
1566 case nir_intrinsic_store_per_vertex_output:
1567 return &instr->src[1];
1568 default:
1569 return NULL;
1570 }
1571 }
1572
1573 /**
1574 * Return the numeric constant that identify a NULL pointer for each address
1575 * format.
1576 */
1577 const nir_const_value *
1578 nir_address_format_null_value(nir_address_format addr_format)
1579 {
1580 const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
1581 [nir_address_format_32bit_global] = {{0}},
1582 [nir_address_format_64bit_global] = {{0}},
1583 [nir_address_format_64bit_bounded_global] = {{0}},
1584 [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
1585 [nir_address_format_32bit_offset] = {{.u32 = ~0}},
1586 [nir_address_format_logical] = {{.u32 = ~0}},
1587 };
1588
1589 assert(addr_format < ARRAY_SIZE(null_values));
1590 return null_values[addr_format];
1591 }
1592
1593 nir_ssa_def *
1594 nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
1595 nir_address_format addr_format)
1596 {
1597 switch (addr_format) {
1598 case nir_address_format_32bit_global:
1599 case nir_address_format_64bit_global:
1600 case nir_address_format_64bit_bounded_global:
1601 case nir_address_format_32bit_index_offset:
1602 case nir_address_format_32bit_offset:
1603 return nir_ball_iequal(b, addr0, addr1);
1604
1605 case nir_address_format_logical:
1606 unreachable("Unsupported address format");
1607 }
1608
1609 unreachable("Invalid address format");
1610 }
1611
1612 nir_ssa_def *
1613 nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
1614 nir_address_format addr_format)
1615 {
1616 switch (addr_format) {
1617 case nir_address_format_32bit_global:
1618 case nir_address_format_64bit_global:
1619 case nir_address_format_32bit_offset:
1620 assert(addr0->num_components == 1);
1621 assert(addr1->num_components == 1);
1622 return nir_isub(b, addr0, addr1);
1623
1624 case nir_address_format_64bit_bounded_global:
1625 return nir_isub(b, addr_to_global(b, addr0, addr_format),
1626 addr_to_global(b, addr1, addr_format));
1627
1628 case nir_address_format_32bit_index_offset:
1629 assert(addr0->num_components == 2);
1630 assert(addr1->num_components == 2);
1631 /* Assume the same buffer index. */
1632 return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
1633
1634 case nir_address_format_logical:
1635 unreachable("Unsupported address format");
1636 }
1637
1638 unreachable("Invalid address format");
1639 }
1640
1641 static bool
1642 is_input(nir_intrinsic_instr *intrin)
1643 {
1644 return intrin->intrinsic == nir_intrinsic_load_input ||
1645 intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
1646 intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
1647 intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
1648 }
1649
1650 static bool
1651 is_output(nir_intrinsic_instr *intrin)
1652 {
1653 return intrin->intrinsic == nir_intrinsic_load_output ||
1654 intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
1655 intrin->intrinsic == nir_intrinsic_store_output ||
1656 intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
1657 }
1658
1659
1660 /**
1661 * This pass adds constant offsets to instr->const_index[0] for input/output
1662 * intrinsics, and resets the offset source to 0. Non-constant offsets remain
1663 * unchanged - since we don't know what part of a compound variable is
1664 * accessed, we allocate storage for the entire thing. For drivers that use
1665 * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
1666 * the offset source will be 0, so that they don't have to add it in manually.
1667 */
1668
1669 static bool
1670 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
1671 nir_variable_mode mode)
1672 {
1673 bool progress = false;
1674 nir_foreach_instr_safe(instr, block) {
1675 if (instr->type != nir_instr_type_intrinsic)
1676 continue;
1677
1678 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1679
1680 if ((mode == nir_var_shader_in && is_input(intrin)) ||
1681 (mode == nir_var_shader_out && is_output(intrin))) {
1682 nir_src *offset = nir_get_io_offset_src(intrin);
1683
1684 if (nir_src_is_const(*offset)) {
1685 intrin->const_index[0] += nir_src_as_uint(*offset);
1686 b->cursor = nir_before_instr(&intrin->instr);
1687 nir_instr_rewrite_src(&intrin->instr, offset,
1688 nir_src_for_ssa(nir_imm_int(b, 0)));
1689 progress = true;
1690 }
1691 }
1692 }
1693
1694 return progress;
1695 }
1696
1697 bool
1698 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode)
1699 {
1700 bool progress = false;
1701
1702 nir_foreach_function(f, nir) {
1703 if (f->impl) {
1704 nir_builder b;
1705 nir_builder_init(&b, f->impl);
1706 nir_foreach_block(block, f->impl) {
1707 progress |= add_const_offset_to_base_block(block, &b, mode);
1708 }
1709 }
1710 }
1711
1712 return progress;
1713 }
1714