nir: Remove some no longer needed asserts
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36
37 struct lower_io_state {
38 nir_builder builder;
39 void *mem_ctx;
40 int (*type_size)(const struct glsl_type *type);
41 nir_variable_mode modes;
42 nir_lower_io_options options;
43 };
44
45 void
46 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
47 unsigned base_offset,
48 int (*type_size)(const struct glsl_type *))
49 {
50 unsigned location = 0;
51
52 /* There are 32 regular and 32 patch varyings allowed */
53 int locations[64][2];
54 for (unsigned i = 0; i < 64; i++) {
55 for (unsigned j = 0; j < 2; j++)
56 locations[i][j] = -1;
57 }
58
59 nir_foreach_variable(var, var_list) {
60 /*
61 * UBO's have their own address spaces, so don't count them towards the
62 * number of global uniforms
63 */
64 if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
65 var->interface_type != NULL)
66 continue;
67
68 /* Make sure we give the same location to varyings packed with
69 * ARB_enhanced_layouts.
70 */
71 int idx = var->data.location - base_offset;
72 if (base_offset && idx >= 0) {
73 assert(idx < ARRAY_SIZE(locations));
74
75 if (locations[idx][var->data.index] == -1) {
76 var->data.driver_location = location;
77 locations[idx][var->data.index] = location;
78
79 /* A dvec3 can be packed with a double we need special handling
80 * for this as we are packing across two locations.
81 */
82 if (glsl_get_base_type(var->type) == GLSL_TYPE_DOUBLE &&
83 glsl_get_vector_elements(var->type) == 3) {
84 /* Hack around type_size functions that expect vectors to be
85 * padded out to vec4. If a float type is the same size as a
86 * double then the type size is padded to vec4, otherwise
87 * set the offset to two doubles which offsets the location
88 * past the first two components in dvec3 which were stored at
89 * the previous location.
90 */
91 unsigned dsize = type_size(glsl_double_type());
92 unsigned offset =
93 dsize == type_size(glsl_float_type()) ? dsize : dsize * 2;
94
95 locations[idx + 1][var->data.index] = location + offset;
96 }
97
98 location += type_size(var->type);
99 } else {
100 var->data.driver_location = locations[idx][var->data.index];
101 }
102 } else {
103 var->data.driver_location = location;
104 location += type_size(var->type);
105 }
106 }
107
108 *size = location;
109 }
110
111 /**
112 * Returns true if we're processing a stage whose inputs are arrays indexed
113 * by a vertex number (such as geometry shader inputs).
114 */
115 static bool
116 is_per_vertex_input(struct lower_io_state *state, nir_variable *var)
117 {
118 gl_shader_stage stage = state->builder.shader->stage;
119
120 return var->data.mode == nir_var_shader_in && !var->data.patch &&
121 (stage == MESA_SHADER_TESS_CTRL ||
122 stage == MESA_SHADER_TESS_EVAL ||
123 stage == MESA_SHADER_GEOMETRY);
124 }
125
126 static bool
127 is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
128 {
129 gl_shader_stage stage = state->builder.shader->stage;
130 return var->data.mode == nir_var_shader_out && !var->data.patch &&
131 stage == MESA_SHADER_TESS_CTRL;
132 }
133
134 static nir_ssa_def *
135 get_io_offset(nir_builder *b, nir_deref_var *deref,
136 nir_ssa_def **vertex_index,
137 int (*type_size)(const struct glsl_type *))
138 {
139 nir_deref *tail = &deref->deref;
140
141 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
142 * outermost array index separate. Process the rest normally.
143 */
144 if (vertex_index != NULL) {
145 tail = tail->child;
146 nir_deref_array *deref_array = nir_deref_as_array(tail);
147
148 nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
149 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
150 vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
151 }
152 *vertex_index = vtx;
153 }
154
155 /* Just emit code and let constant-folding go to town */
156 nir_ssa_def *offset = nir_imm_int(b, 0);
157
158 while (tail->child != NULL) {
159 const struct glsl_type *parent_type = tail->type;
160 tail = tail->child;
161
162 if (tail->deref_type == nir_deref_type_array) {
163 nir_deref_array *deref_array = nir_deref_as_array(tail);
164 unsigned size = type_size(tail->type);
165
166 offset = nir_iadd(b, offset,
167 nir_imm_int(b, size * deref_array->base_offset));
168
169 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
170 nir_ssa_def *mul =
171 nir_imul(b, nir_imm_int(b, size),
172 nir_ssa_for_src(b, deref_array->indirect, 1));
173
174 offset = nir_iadd(b, offset, mul);
175 }
176 } else if (tail->deref_type == nir_deref_type_struct) {
177 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
178
179 unsigned field_offset = 0;
180 for (unsigned i = 0; i < deref_struct->index; i++) {
181 field_offset += type_size(glsl_get_struct_field(parent_type, i));
182 }
183 offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
184 }
185 }
186
187 return offset;
188 }
189
190 static nir_intrinsic_instr *
191 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
192 nir_ssa_def *vertex_index, nir_ssa_def *offset)
193 {
194 const nir_shader *nir = state->builder.shader;
195 nir_variable *var = intrin->variables[0]->var;
196 nir_variable_mode mode = var->data.mode;
197 nir_ssa_def *barycentric = NULL;
198
199 nir_intrinsic_op op;
200 switch (mode) {
201 case nir_var_shader_in:
202 if (nir->stage == MESA_SHADER_FRAGMENT &&
203 nir->options->use_interpolated_input_intrinsics &&
204 var->data.interpolation != INTERP_MODE_FLAT) {
205 assert(vertex_index == NULL);
206
207 nir_intrinsic_op bary_op;
208 if (var->data.sample ||
209 (state->options & nir_lower_io_force_sample_interpolation))
210 bary_op = nir_intrinsic_load_barycentric_sample;
211 else if (var->data.centroid)
212 bary_op = nir_intrinsic_load_barycentric_centroid;
213 else
214 bary_op = nir_intrinsic_load_barycentric_pixel;
215
216 barycentric = nir_load_barycentric(&state->builder, bary_op,
217 var->data.interpolation);
218 op = nir_intrinsic_load_interpolated_input;
219 } else {
220 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
221 nir_intrinsic_load_input;
222 }
223 break;
224 case nir_var_shader_out:
225 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
226 nir_intrinsic_load_output;
227 break;
228 case nir_var_uniform:
229 op = nir_intrinsic_load_uniform;
230 break;
231 case nir_var_shared:
232 op = nir_intrinsic_load_shared;
233 break;
234 default:
235 unreachable("Unknown variable mode");
236 }
237
238 nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, op);
239 load->num_components = intrin->num_components;
240
241 nir_intrinsic_set_base(load, var->data.driver_location);
242 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
243 nir_intrinsic_set_component(load, var->data.location_frac);
244
245 if (load->intrinsic == nir_intrinsic_load_uniform)
246 nir_intrinsic_set_range(load, state->type_size(var->type));
247
248 if (vertex_index) {
249 load->src[0] = nir_src_for_ssa(vertex_index);
250 load->src[1] = nir_src_for_ssa(offset);
251 } else if (barycentric) {
252 load->src[0] = nir_src_for_ssa(barycentric);
253 load->src[1] = nir_src_for_ssa(offset);
254 } else {
255 load->src[0] = nir_src_for_ssa(offset);
256 }
257
258 return load;
259 }
260
261 static nir_intrinsic_instr *
262 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
263 nir_ssa_def *vertex_index, nir_ssa_def *offset)
264 {
265 nir_variable *var = intrin->variables[0]->var;
266 nir_variable_mode mode = var->data.mode;
267
268 nir_intrinsic_op op;
269 if (mode == nir_var_shared) {
270 op = nir_intrinsic_store_shared;
271 } else {
272 assert(mode == nir_var_shader_out);
273 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
274 nir_intrinsic_store_output;
275 }
276
277 nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, op);
278 store->num_components = intrin->num_components;
279
280 nir_src_copy(&store->src[0], &intrin->src[0], store);
281
282 nir_intrinsic_set_base(store, var->data.driver_location);
283
284 if (mode == nir_var_shader_out)
285 nir_intrinsic_set_component(store, var->data.location_frac);
286
287 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
288
289 if (vertex_index)
290 store->src[1] = nir_src_for_ssa(vertex_index);
291
292 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
293
294 return store;
295 }
296
297 static nir_intrinsic_instr *
298 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
299 nir_ssa_def *offset)
300 {
301 nir_variable *var = intrin->variables[0]->var;
302
303 assert(var->data.mode == nir_var_shared);
304
305 nir_intrinsic_op op;
306 switch (intrin->intrinsic) {
307 #define OP(O) case nir_intrinsic_var_##O: op = nir_intrinsic_shared_##O; break;
308 OP(atomic_exchange)
309 OP(atomic_comp_swap)
310 OP(atomic_add)
311 OP(atomic_imin)
312 OP(atomic_umin)
313 OP(atomic_imax)
314 OP(atomic_umax)
315 OP(atomic_and)
316 OP(atomic_or)
317 OP(atomic_xor)
318 #undef OP
319 default:
320 unreachable("Invalid atomic");
321 }
322
323 nir_intrinsic_instr *atomic =
324 nir_intrinsic_instr_create(state->mem_ctx, op);
325
326 nir_intrinsic_set_base(atomic, var->data.driver_location);
327
328 atomic->src[0] = nir_src_for_ssa(offset);
329 for (unsigned i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
330 nir_src_copy(&atomic->src[i+1], &intrin->src[i], atomic);
331 }
332
333 return atomic;
334 }
335
336 static nir_intrinsic_instr *
337 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
338 nir_ssa_def *offset)
339 {
340 nir_variable *var = intrin->variables[0]->var;
341
342 assert(var->data.mode == nir_var_shader_in);
343
344 /* Ignore interpolateAt() for flat variables - flat is flat. */
345 if (var->data.interpolation == INTERP_MODE_FLAT)
346 return lower_load(intrin, state, NULL, offset);
347
348 nir_intrinsic_op bary_op;
349 switch (intrin->intrinsic) {
350 case nir_intrinsic_interp_var_at_centroid:
351 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
352 nir_intrinsic_load_barycentric_sample :
353 nir_intrinsic_load_barycentric_centroid;
354 break;
355 case nir_intrinsic_interp_var_at_sample:
356 bary_op = nir_intrinsic_load_barycentric_at_sample;
357 break;
358 case nir_intrinsic_interp_var_at_offset:
359 bary_op = nir_intrinsic_load_barycentric_at_offset;
360 break;
361 default:
362 unreachable("Bogus interpolateAt() intrinsic.");
363 }
364
365 nir_intrinsic_instr *bary_setup =
366 nir_intrinsic_instr_create(state->mem_ctx, bary_op);
367
368 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
369 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
370
371 if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid)
372 nir_src_copy(&bary_setup->src[0], &intrin->src[0], bary_setup);
373
374 nir_builder_instr_insert(&state->builder, &bary_setup->instr);
375
376 nir_intrinsic_instr *load =
377 nir_intrinsic_instr_create(state->mem_ctx,
378 nir_intrinsic_load_interpolated_input);
379 load->num_components = intrin->num_components;
380
381 nir_intrinsic_set_base(load, var->data.driver_location);
382 nir_intrinsic_set_component(load, var->data.location_frac);
383
384 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
385 load->src[1] = nir_src_for_ssa(offset);
386
387 return load;
388 }
389
390 static bool
391 nir_lower_io_block(nir_block *block,
392 struct lower_io_state *state)
393 {
394 nir_builder *b = &state->builder;
395 const nir_shader_compiler_options *options = b->shader->options;
396
397 nir_foreach_instr_safe(instr, block) {
398 if (instr->type != nir_instr_type_intrinsic)
399 continue;
400
401 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
402
403 switch (intrin->intrinsic) {
404 case nir_intrinsic_load_var:
405 case nir_intrinsic_store_var:
406 case nir_intrinsic_var_atomic_add:
407 case nir_intrinsic_var_atomic_imin:
408 case nir_intrinsic_var_atomic_umin:
409 case nir_intrinsic_var_atomic_imax:
410 case nir_intrinsic_var_atomic_umax:
411 case nir_intrinsic_var_atomic_and:
412 case nir_intrinsic_var_atomic_or:
413 case nir_intrinsic_var_atomic_xor:
414 case nir_intrinsic_var_atomic_exchange:
415 case nir_intrinsic_var_atomic_comp_swap:
416 /* We can lower the io for this nir instrinsic */
417 break;
418 case nir_intrinsic_interp_var_at_centroid:
419 case nir_intrinsic_interp_var_at_sample:
420 case nir_intrinsic_interp_var_at_offset:
421 /* We can optionally lower these to load_interpolated_input */
422 if (options->use_interpolated_input_intrinsics)
423 break;
424 default:
425 /* We can't lower the io for this nir instrinsic, so skip it */
426 continue;
427 }
428
429 nir_variable *var = intrin->variables[0]->var;
430 nir_variable_mode mode = var->data.mode;
431
432 if ((state->modes & mode) == 0)
433 continue;
434
435 if (mode != nir_var_shader_in &&
436 mode != nir_var_shader_out &&
437 mode != nir_var_shared &&
438 mode != nir_var_uniform)
439 continue;
440
441 b->cursor = nir_before_instr(instr);
442
443 const bool per_vertex =
444 is_per_vertex_input(state, var) || is_per_vertex_output(state, var);
445
446 nir_ssa_def *offset;
447 nir_ssa_def *vertex_index = NULL;
448
449 offset = get_io_offset(b, intrin->variables[0],
450 per_vertex ? &vertex_index : NULL,
451 state->type_size);
452
453 nir_intrinsic_instr *replacement;
454
455 switch (intrin->intrinsic) {
456 case nir_intrinsic_load_var:
457 replacement = lower_load(intrin, state, vertex_index, offset);
458 break;
459
460 case nir_intrinsic_store_var:
461 replacement = lower_store(intrin, state, vertex_index, offset);
462 break;
463
464 case nir_intrinsic_var_atomic_add:
465 case nir_intrinsic_var_atomic_imin:
466 case nir_intrinsic_var_atomic_umin:
467 case nir_intrinsic_var_atomic_imax:
468 case nir_intrinsic_var_atomic_umax:
469 case nir_intrinsic_var_atomic_and:
470 case nir_intrinsic_var_atomic_or:
471 case nir_intrinsic_var_atomic_xor:
472 case nir_intrinsic_var_atomic_exchange:
473 case nir_intrinsic_var_atomic_comp_swap:
474 assert(vertex_index == NULL);
475 replacement = lower_atomic(intrin, state, offset);
476 break;
477
478 case nir_intrinsic_interp_var_at_centroid:
479 case nir_intrinsic_interp_var_at_sample:
480 case nir_intrinsic_interp_var_at_offset:
481 assert(vertex_index == NULL);
482 replacement = lower_interpolate_at(intrin, state, offset);
483 break;
484
485 default:
486 continue;
487 }
488
489 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
490 if (intrin->dest.is_ssa) {
491 nir_ssa_dest_init(&replacement->instr, &replacement->dest,
492 intrin->dest.ssa.num_components,
493 intrin->dest.ssa.bit_size, NULL);
494 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
495 nir_src_for_ssa(&replacement->dest.ssa));
496 } else {
497 nir_dest_copy(&replacement->dest, &intrin->dest, state->mem_ctx);
498 }
499 }
500
501 nir_instr_insert_before(&intrin->instr, &replacement->instr);
502 nir_instr_remove(&intrin->instr);
503 }
504
505 return true;
506 }
507
508 static void
509 nir_lower_io_impl(nir_function_impl *impl,
510 nir_variable_mode modes,
511 int (*type_size)(const struct glsl_type *),
512 nir_lower_io_options options)
513 {
514 struct lower_io_state state;
515
516 nir_builder_init(&state.builder, impl);
517 state.mem_ctx = ralloc_parent(impl);
518 state.modes = modes;
519 state.type_size = type_size;
520 state.options = options;
521
522 nir_foreach_block(block, impl) {
523 nir_lower_io_block(block, &state);
524 }
525
526 nir_metadata_preserve(impl, nir_metadata_block_index |
527 nir_metadata_dominance);
528 }
529
530 void
531 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
532 int (*type_size)(const struct glsl_type *),
533 nir_lower_io_options options)
534 {
535 nir_foreach_function(function, shader) {
536 if (function->impl) {
537 nir_lower_io_impl(function->impl, modes, type_size, options);
538 }
539 }
540 }
541
542 /**
543 * Return the offset soruce for a load/store intrinsic.
544 */
545 nir_src *
546 nir_get_io_offset_src(nir_intrinsic_instr *instr)
547 {
548 switch (instr->intrinsic) {
549 case nir_intrinsic_load_input:
550 case nir_intrinsic_load_output:
551 case nir_intrinsic_load_uniform:
552 return &instr->src[0];
553 case nir_intrinsic_load_ubo:
554 case nir_intrinsic_load_ssbo:
555 case nir_intrinsic_load_per_vertex_input:
556 case nir_intrinsic_load_per_vertex_output:
557 case nir_intrinsic_load_interpolated_input:
558 case nir_intrinsic_store_output:
559 return &instr->src[1];
560 case nir_intrinsic_store_ssbo:
561 case nir_intrinsic_store_per_vertex_output:
562 return &instr->src[2];
563 default:
564 return NULL;
565 }
566 }
567
568 /**
569 * Return the vertex index source for a load/store per_vertex intrinsic.
570 */
571 nir_src *
572 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
573 {
574 switch (instr->intrinsic) {
575 case nir_intrinsic_load_per_vertex_input:
576 case nir_intrinsic_load_per_vertex_output:
577 return &instr->src[0];
578 case nir_intrinsic_store_per_vertex_output:
579 return &instr->src[1];
580 default:
581 return NULL;
582 }
583 }