nir: Add a flag to lower_io to force "sample" interpolation
[mesa.git] / src / compiler / nir / nir_lower_io.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36
37 struct lower_io_state {
38 nir_builder builder;
39 void *mem_ctx;
40 int (*type_size)(const struct glsl_type *type);
41 nir_variable_mode modes;
42 nir_lower_io_options options;
43 };
44
45 void
46 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
47 unsigned base_offset,
48 int (*type_size)(const struct glsl_type *))
49 {
50 unsigned location = 0;
51
52 /* There are 32 regular and 32 patch varyings allowed */
53 int locations[64][2];
54 for (unsigned i = 0; i < 64; i++) {
55 for (unsigned j = 0; j < 2; j++)
56 locations[i][j] = -1;
57 }
58
59 nir_foreach_variable(var, var_list) {
60 /*
61 * UBO's have their own address spaces, so don't count them towards the
62 * number of global uniforms
63 */
64 if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
65 var->interface_type != NULL)
66 continue;
67
68 /* Make sure we give the same location to varyings packed with
69 * ARB_enhanced_layouts.
70 */
71 int idx = var->data.location - base_offset;
72 if (base_offset && idx >= 0) {
73 assert(idx < ARRAY_SIZE(locations));
74
75 if (locations[idx][var->data.index] == -1) {
76 var->data.driver_location = location;
77 locations[idx][var->data.index] = location;
78
79 /* A dvec3 can be packed with a double we need special handling
80 * for this as we are packing across two locations.
81 */
82 if (glsl_get_base_type(var->type) == GLSL_TYPE_DOUBLE &&
83 glsl_get_vector_elements(var->type) == 3) {
84 /* Hack around type_size functions that expect vectors to be
85 * padded out to vec4. If a float type is the same size as a
86 * double then the type size is padded to vec4, otherwise
87 * set the offset to two doubles which offsets the location
88 * past the first two components in dvec3 which were stored at
89 * the previous location.
90 */
91 unsigned dsize = type_size(glsl_double_type());
92 unsigned offset =
93 dsize == type_size(glsl_float_type()) ? dsize : dsize * 2;
94
95 locations[idx + 1][var->data.index] = location + offset;
96 }
97
98 location += type_size(var->type);
99 } else {
100 var->data.driver_location = locations[idx][var->data.index];
101 }
102 } else {
103 var->data.driver_location = location;
104 location += type_size(var->type);
105 }
106 }
107
108 *size = location;
109 }
110
111 /**
112 * Returns true if we're processing a stage whose inputs are arrays indexed
113 * by a vertex number (such as geometry shader inputs).
114 */
115 static bool
116 is_per_vertex_input(struct lower_io_state *state, nir_variable *var)
117 {
118 gl_shader_stage stage = state->builder.shader->stage;
119
120 return var->data.mode == nir_var_shader_in && !var->data.patch &&
121 (stage == MESA_SHADER_TESS_CTRL ||
122 stage == MESA_SHADER_TESS_EVAL ||
123 stage == MESA_SHADER_GEOMETRY);
124 }
125
126 static bool
127 is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
128 {
129 gl_shader_stage stage = state->builder.shader->stage;
130 return var->data.mode == nir_var_shader_out && !var->data.patch &&
131 stage == MESA_SHADER_TESS_CTRL;
132 }
133
134 static nir_ssa_def *
135 get_io_offset(nir_builder *b, nir_deref_var *deref,
136 nir_ssa_def **vertex_index,
137 int (*type_size)(const struct glsl_type *))
138 {
139 nir_deref *tail = &deref->deref;
140
141 /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
142 * outermost array index separate. Process the rest normally.
143 */
144 if (vertex_index != NULL) {
145 tail = tail->child;
146 assert(tail->deref_type == nir_deref_type_array);
147 nir_deref_array *deref_array = nir_deref_as_array(tail);
148
149 nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
150 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
151 vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
152 }
153 *vertex_index = vtx;
154 }
155
156 /* Just emit code and let constant-folding go to town */
157 nir_ssa_def *offset = nir_imm_int(b, 0);
158
159 while (tail->child != NULL) {
160 const struct glsl_type *parent_type = tail->type;
161 tail = tail->child;
162
163 if (tail->deref_type == nir_deref_type_array) {
164 nir_deref_array *deref_array = nir_deref_as_array(tail);
165 unsigned size = type_size(tail->type);
166
167 offset = nir_iadd(b, offset,
168 nir_imm_int(b, size * deref_array->base_offset));
169
170 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
171 nir_ssa_def *mul =
172 nir_imul(b, nir_imm_int(b, size),
173 nir_ssa_for_src(b, deref_array->indirect, 1));
174
175 offset = nir_iadd(b, offset, mul);
176 }
177 } else if (tail->deref_type == nir_deref_type_struct) {
178 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
179
180 unsigned field_offset = 0;
181 for (unsigned i = 0; i < deref_struct->index; i++) {
182 field_offset += type_size(glsl_get_struct_field(parent_type, i));
183 }
184 offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
185 }
186 }
187
188 return offset;
189 }
190
191 static nir_intrinsic_instr *
192 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
193 nir_ssa_def *vertex_index, nir_ssa_def *offset)
194 {
195 const nir_shader *nir = state->builder.shader;
196 nir_variable *var = intrin->variables[0]->var;
197 nir_variable_mode mode = var->data.mode;
198 nir_ssa_def *barycentric = NULL;
199
200 nir_intrinsic_op op;
201 switch (mode) {
202 case nir_var_shader_in:
203 if (nir->stage == MESA_SHADER_FRAGMENT &&
204 nir->options->use_interpolated_input_intrinsics &&
205 var->data.interpolation != INTERP_MODE_FLAT) {
206 assert(vertex_index == NULL);
207
208 nir_intrinsic_op bary_op;
209 if (var->data.sample ||
210 (state->options & nir_lower_io_force_sample_interpolation))
211 bary_op = nir_intrinsic_load_barycentric_sample;
212 else if (var->data.centroid)
213 bary_op = nir_intrinsic_load_barycentric_centroid;
214 else
215 bary_op = nir_intrinsic_load_barycentric_pixel;
216
217 barycentric = nir_load_barycentric(&state->builder, bary_op,
218 var->data.interpolation);
219 op = nir_intrinsic_load_interpolated_input;
220 } else {
221 op = vertex_index ? nir_intrinsic_load_per_vertex_input :
222 nir_intrinsic_load_input;
223 }
224 break;
225 case nir_var_shader_out:
226 op = vertex_index ? nir_intrinsic_load_per_vertex_output :
227 nir_intrinsic_load_output;
228 break;
229 case nir_var_uniform:
230 op = nir_intrinsic_load_uniform;
231 break;
232 case nir_var_shared:
233 op = nir_intrinsic_load_shared;
234 break;
235 default:
236 unreachable("Unknown variable mode");
237 }
238
239 nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, op);
240 load->num_components = intrin->num_components;
241
242 nir_intrinsic_set_base(load, var->data.driver_location);
243 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
244 nir_intrinsic_set_component(load, var->data.location_frac);
245
246 if (load->intrinsic == nir_intrinsic_load_uniform)
247 nir_intrinsic_set_range(load, state->type_size(var->type));
248
249 if (vertex_index) {
250 load->src[0] = nir_src_for_ssa(vertex_index);
251 load->src[1] = nir_src_for_ssa(offset);
252 } else if (barycentric) {
253 load->src[0] = nir_src_for_ssa(barycentric);
254 load->src[1] = nir_src_for_ssa(offset);
255 } else {
256 load->src[0] = nir_src_for_ssa(offset);
257 }
258
259 return load;
260 }
261
262 static nir_intrinsic_instr *
263 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
264 nir_ssa_def *vertex_index, nir_ssa_def *offset)
265 {
266 nir_variable *var = intrin->variables[0]->var;
267 nir_variable_mode mode = var->data.mode;
268
269 nir_intrinsic_op op;
270 if (mode == nir_var_shared) {
271 op = nir_intrinsic_store_shared;
272 } else {
273 assert(mode == nir_var_shader_out);
274 op = vertex_index ? nir_intrinsic_store_per_vertex_output :
275 nir_intrinsic_store_output;
276 }
277
278 nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, op);
279 store->num_components = intrin->num_components;
280
281 nir_src_copy(&store->src[0], &intrin->src[0], store);
282
283 nir_intrinsic_set_base(store, var->data.driver_location);
284
285 if (mode == nir_var_shader_out)
286 nir_intrinsic_set_component(store, var->data.location_frac);
287
288 nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
289
290 if (vertex_index)
291 store->src[1] = nir_src_for_ssa(vertex_index);
292
293 store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
294
295 return store;
296 }
297
298 static nir_intrinsic_instr *
299 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
300 nir_ssa_def *offset)
301 {
302 nir_variable *var = intrin->variables[0]->var;
303
304 assert(var->data.mode == nir_var_shared);
305
306 nir_intrinsic_op op;
307 switch (intrin->intrinsic) {
308 #define OP(O) case nir_intrinsic_var_##O: op = nir_intrinsic_shared_##O; break;
309 OP(atomic_exchange)
310 OP(atomic_comp_swap)
311 OP(atomic_add)
312 OP(atomic_imin)
313 OP(atomic_umin)
314 OP(atomic_imax)
315 OP(atomic_umax)
316 OP(atomic_and)
317 OP(atomic_or)
318 OP(atomic_xor)
319 #undef OP
320 default:
321 unreachable("Invalid atomic");
322 }
323
324 nir_intrinsic_instr *atomic =
325 nir_intrinsic_instr_create(state->mem_ctx, op);
326
327 nir_intrinsic_set_base(atomic, var->data.driver_location);
328
329 atomic->src[0] = nir_src_for_ssa(offset);
330 for (unsigned i = 0; i < nir_op_infos[intrin->intrinsic].num_inputs; i++) {
331 nir_src_copy(&atomic->src[i+1], &intrin->src[i], atomic);
332 }
333
334 return atomic;
335 }
336
337 static nir_intrinsic_instr *
338 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
339 nir_ssa_def *offset)
340 {
341 nir_variable *var = intrin->variables[0]->var;
342
343 assert(var->data.mode == nir_var_shader_in);
344
345 /* Ignore interpolateAt() for flat variables - flat is flat. */
346 if (var->data.interpolation == INTERP_MODE_FLAT)
347 return lower_load(intrin, state, NULL, offset);
348
349 nir_intrinsic_op bary_op;
350 switch (intrin->intrinsic) {
351 case nir_intrinsic_interp_var_at_centroid:
352 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
353 nir_intrinsic_load_barycentric_sample :
354 nir_intrinsic_load_barycentric_centroid;
355 break;
356 case nir_intrinsic_interp_var_at_sample:
357 bary_op = nir_intrinsic_load_barycentric_at_sample;
358 break;
359 case nir_intrinsic_interp_var_at_offset:
360 bary_op = nir_intrinsic_load_barycentric_at_offset;
361 break;
362 default:
363 unreachable("Bogus interpolateAt() intrinsic.");
364 }
365
366 nir_intrinsic_instr *bary_setup =
367 nir_intrinsic_instr_create(state->mem_ctx, bary_op);
368
369 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
370 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
371
372 if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid)
373 nir_src_copy(&bary_setup->src[0], &intrin->src[0], bary_setup);
374
375 nir_builder_instr_insert(&state->builder, &bary_setup->instr);
376
377 nir_intrinsic_instr *load =
378 nir_intrinsic_instr_create(state->mem_ctx,
379 nir_intrinsic_load_interpolated_input);
380 load->num_components = intrin->num_components;
381
382 nir_intrinsic_set_base(load, var->data.driver_location);
383 nir_intrinsic_set_component(load, var->data.location_frac);
384
385 load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
386 load->src[1] = nir_src_for_ssa(offset);
387
388 return load;
389 }
390
391 static bool
392 nir_lower_io_block(nir_block *block,
393 struct lower_io_state *state)
394 {
395 nir_builder *b = &state->builder;
396 const nir_shader_compiler_options *options = b->shader->options;
397
398 nir_foreach_instr_safe(instr, block) {
399 if (instr->type != nir_instr_type_intrinsic)
400 continue;
401
402 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
403
404 switch (intrin->intrinsic) {
405 case nir_intrinsic_load_var:
406 case nir_intrinsic_store_var:
407 case nir_intrinsic_var_atomic_add:
408 case nir_intrinsic_var_atomic_imin:
409 case nir_intrinsic_var_atomic_umin:
410 case nir_intrinsic_var_atomic_imax:
411 case nir_intrinsic_var_atomic_umax:
412 case nir_intrinsic_var_atomic_and:
413 case nir_intrinsic_var_atomic_or:
414 case nir_intrinsic_var_atomic_xor:
415 case nir_intrinsic_var_atomic_exchange:
416 case nir_intrinsic_var_atomic_comp_swap:
417 /* We can lower the io for this nir instrinsic */
418 break;
419 case nir_intrinsic_interp_var_at_centroid:
420 case nir_intrinsic_interp_var_at_sample:
421 case nir_intrinsic_interp_var_at_offset:
422 /* We can optionally lower these to load_interpolated_input */
423 if (options->use_interpolated_input_intrinsics)
424 break;
425 default:
426 /* We can't lower the io for this nir instrinsic, so skip it */
427 continue;
428 }
429
430 nir_variable *var = intrin->variables[0]->var;
431 nir_variable_mode mode = var->data.mode;
432
433 if ((state->modes & mode) == 0)
434 continue;
435
436 if (mode != nir_var_shader_in &&
437 mode != nir_var_shader_out &&
438 mode != nir_var_shared &&
439 mode != nir_var_uniform)
440 continue;
441
442 b->cursor = nir_before_instr(instr);
443
444 const bool per_vertex =
445 is_per_vertex_input(state, var) || is_per_vertex_output(state, var);
446
447 nir_ssa_def *offset;
448 nir_ssa_def *vertex_index = NULL;
449
450 offset = get_io_offset(b, intrin->variables[0],
451 per_vertex ? &vertex_index : NULL,
452 state->type_size);
453
454 nir_intrinsic_instr *replacement;
455
456 switch (intrin->intrinsic) {
457 case nir_intrinsic_load_var:
458 replacement = lower_load(intrin, state, vertex_index, offset);
459 break;
460
461 case nir_intrinsic_store_var:
462 replacement = lower_store(intrin, state, vertex_index, offset);
463 break;
464
465 case nir_intrinsic_var_atomic_add:
466 case nir_intrinsic_var_atomic_imin:
467 case nir_intrinsic_var_atomic_umin:
468 case nir_intrinsic_var_atomic_imax:
469 case nir_intrinsic_var_atomic_umax:
470 case nir_intrinsic_var_atomic_and:
471 case nir_intrinsic_var_atomic_or:
472 case nir_intrinsic_var_atomic_xor:
473 case nir_intrinsic_var_atomic_exchange:
474 case nir_intrinsic_var_atomic_comp_swap:
475 assert(vertex_index == NULL);
476 replacement = lower_atomic(intrin, state, offset);
477 break;
478
479 case nir_intrinsic_interp_var_at_centroid:
480 case nir_intrinsic_interp_var_at_sample:
481 case nir_intrinsic_interp_var_at_offset:
482 assert(vertex_index == NULL);
483 replacement = lower_interpolate_at(intrin, state, offset);
484 break;
485
486 default:
487 continue;
488 }
489
490 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
491 if (intrin->dest.is_ssa) {
492 nir_ssa_dest_init(&replacement->instr, &replacement->dest,
493 intrin->dest.ssa.num_components,
494 intrin->dest.ssa.bit_size, NULL);
495 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
496 nir_src_for_ssa(&replacement->dest.ssa));
497 } else {
498 nir_dest_copy(&replacement->dest, &intrin->dest, state->mem_ctx);
499 }
500 }
501
502 nir_instr_insert_before(&intrin->instr, &replacement->instr);
503 nir_instr_remove(&intrin->instr);
504 }
505
506 return true;
507 }
508
509 static void
510 nir_lower_io_impl(nir_function_impl *impl,
511 nir_variable_mode modes,
512 int (*type_size)(const struct glsl_type *),
513 nir_lower_io_options options)
514 {
515 struct lower_io_state state;
516
517 nir_builder_init(&state.builder, impl);
518 state.mem_ctx = ralloc_parent(impl);
519 state.modes = modes;
520 state.type_size = type_size;
521 state.options = options;
522
523 nir_foreach_block(block, impl) {
524 nir_lower_io_block(block, &state);
525 }
526
527 nir_metadata_preserve(impl, nir_metadata_block_index |
528 nir_metadata_dominance);
529 }
530
531 void
532 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
533 int (*type_size)(const struct glsl_type *),
534 nir_lower_io_options options)
535 {
536 nir_foreach_function(function, shader) {
537 if (function->impl) {
538 nir_lower_io_impl(function->impl, modes, type_size, options);
539 }
540 }
541 }
542
543 /**
544 * Return the offset soruce for a load/store intrinsic.
545 */
546 nir_src *
547 nir_get_io_offset_src(nir_intrinsic_instr *instr)
548 {
549 switch (instr->intrinsic) {
550 case nir_intrinsic_load_input:
551 case nir_intrinsic_load_output:
552 case nir_intrinsic_load_uniform:
553 return &instr->src[0];
554 case nir_intrinsic_load_ubo:
555 case nir_intrinsic_load_ssbo:
556 case nir_intrinsic_load_per_vertex_input:
557 case nir_intrinsic_load_per_vertex_output:
558 case nir_intrinsic_load_interpolated_input:
559 case nir_intrinsic_store_output:
560 return &instr->src[1];
561 case nir_intrinsic_store_ssbo:
562 case nir_intrinsic_store_per_vertex_output:
563 return &instr->src[2];
564 default:
565 return NULL;
566 }
567 }
568
569 /**
570 * Return the vertex index source for a load/store per_vertex intrinsic.
571 */
572 nir_src *
573 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
574 {
575 switch (instr->intrinsic) {
576 case nir_intrinsic_load_per_vertex_input:
577 case nir_intrinsic_load_per_vertex_output:
578 return &instr->src[0];
579 case nir_intrinsic_store_per_vertex_output:
580 return &instr->src[1];
581 default:
582 return NULL;
583 }
584 }