radeonsi: set better tessellation tunables on gfx9 and gfx10
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_nir.c
1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27
28 #include "ac_nir_to_llvm.h"
29
30 #include "tgsi/tgsi_from_mesa.h"
31
32 #include "compiler/nir/nir.h"
33 #include "compiler/nir_types.h"
34 #include "compiler/nir/nir_builder.h"
35 #include "compiler/nir/nir_deref.h"
36
37 static const nir_deref_instr *tex_get_texture_deref(nir_tex_instr *instr)
38 {
39 for (unsigned i = 0; i < instr->num_srcs; i++) {
40 switch (instr->src[i].src_type) {
41 case nir_tex_src_texture_deref:
42 return nir_src_as_deref(instr->src[i].src);
43 default:
44 break;
45 }
46 }
47
48 return NULL;
49 }
50
51 static nir_variable* intrinsic_get_var(nir_intrinsic_instr *instr)
52 {
53 return nir_deref_instr_get_variable(nir_src_as_deref(instr->src[0]));
54 }
55
56 static void gather_usage_helper(const nir_deref_instr **deref_ptr,
57 unsigned location,
58 uint8_t mask,
59 uint8_t *usage_mask)
60 {
61 for (; *deref_ptr; deref_ptr++) {
62 const nir_deref_instr *deref = *deref_ptr;
63 switch (deref->deref_type) {
64 case nir_deref_type_array: {
65 unsigned elem_size =
66 glsl_count_attribute_slots(deref->type, false);
67 if (nir_src_is_const(deref->arr.index)) {
68 location += elem_size * nir_src_as_uint(deref->arr.index);
69 } else {
70 unsigned array_elems =
71 glsl_get_length(deref_ptr[-1]->type);
72 for (unsigned i = 0; i < array_elems; i++) {
73 gather_usage_helper(deref_ptr + 1,
74 location + elem_size * i,
75 mask, usage_mask);
76 }
77 return;
78 }
79 break;
80 }
81 case nir_deref_type_struct: {
82 const struct glsl_type *parent_type =
83 deref_ptr[-1]->type;
84 unsigned index = deref->strct.index;
85 for (unsigned i = 0; i < index; i++) {
86 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
87 location += glsl_count_attribute_slots(ft, false);
88 }
89 break;
90 }
91 default:
92 unreachable("Unhandled deref type in gather_components_used_helper");
93 }
94 }
95
96 usage_mask[location] |= mask & 0xf;
97 if (mask & 0xf0)
98 usage_mask[location + 1] |= (mask >> 4) & 0xf;
99 }
100
101 static void gather_usage(const nir_deref_instr *deref,
102 uint8_t mask,
103 uint8_t *usage_mask)
104 {
105 nir_deref_path path;
106 nir_deref_path_init(&path, (nir_deref_instr *)deref, NULL);
107
108 unsigned location_frac = path.path[0]->var->data.location_frac;
109 if (glsl_type_is_64bit(deref->type)) {
110 uint8_t new_mask = 0;
111 for (unsigned i = 0; i < 4; i++) {
112 if (mask & (1 << i))
113 new_mask |= 0x3 << (2 * i);
114 }
115 mask = new_mask << location_frac;
116 } else {
117 mask <<= location_frac;
118 mask &= 0xf;
119 }
120
121 gather_usage_helper((const nir_deref_instr **)&path.path[1],
122 path.path[0]->var->data.driver_location,
123 mask, usage_mask);
124
125 nir_deref_path_finish(&path);
126 }
127
128 static void gather_intrinsic_load_deref_input_info(const nir_shader *nir,
129 const nir_intrinsic_instr *instr,
130 const nir_deref_instr *deref,
131 struct si_shader_info *info)
132 {
133 switch (nir->info.stage) {
134 case MESA_SHADER_VERTEX:
135 gather_usage(deref, nir_ssa_def_components_read(&instr->dest.ssa),
136 info->input_usage_mask);
137 default:;
138 }
139 }
140
141 static void gather_intrinsic_load_deref_output_info(const nir_shader *nir,
142 const nir_intrinsic_instr *instr,
143 nir_variable *var,
144 struct si_shader_info *info)
145 {
146 assert(var && var->data.mode == nir_var_shader_out);
147
148 switch (nir->info.stage) {
149 case MESA_SHADER_TESS_CTRL:
150 if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
151 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
152 info->reads_tessfactor_outputs = true;
153 else if (var->data.patch)
154 info->reads_perpatch_outputs = true;
155 else
156 info->reads_pervertex_outputs = true;
157 break;
158
159 case MESA_SHADER_FRAGMENT:
160 if (var->data.fb_fetch_output)
161 info->uses_fbfetch = true;
162 break;
163 default:;
164 }
165 }
166
167 static void gather_intrinsic_store_deref_output_info(const nir_shader *nir,
168 const nir_intrinsic_instr *instr,
169 const nir_deref_instr *deref,
170 struct si_shader_info *info)
171 {
172 switch (nir->info.stage) {
173 case MESA_SHADER_VERTEX: /* needed by LS, ES */
174 case MESA_SHADER_TESS_EVAL: /* needed by ES */
175 case MESA_SHADER_GEOMETRY:
176 gather_usage(deref, nir_intrinsic_write_mask(instr),
177 info->output_usagemask);
178 break;
179 default:;
180 }
181 }
182
183 static void scan_instruction(const struct nir_shader *nir,
184 struct si_shader_info *info,
185 nir_instr *instr)
186 {
187 if (instr->type == nir_instr_type_alu) {
188 nir_alu_instr *alu = nir_instr_as_alu(instr);
189
190 switch (alu->op) {
191 case nir_op_fddx:
192 case nir_op_fddy:
193 case nir_op_fddx_fine:
194 case nir_op_fddy_fine:
195 case nir_op_fddx_coarse:
196 case nir_op_fddy_coarse:
197 info->uses_derivatives = true;
198 break;
199 default:
200 break;
201 }
202 } else if (instr->type == nir_instr_type_tex) {
203 nir_tex_instr *tex = nir_instr_as_tex(instr);
204 const nir_deref_instr *deref = tex_get_texture_deref(tex);
205 nir_variable *var = deref ? nir_deref_instr_get_variable(deref) : NULL;
206
207 if (!var) {
208 info->samplers_declared |=
209 u_bit_consecutive(tex->sampler_index, 1);
210 } else {
211 if (deref->mode != nir_var_uniform || var->data.bindless)
212 info->uses_bindless_samplers = true;
213 }
214
215 switch (tex->op) {
216 case nir_texop_tex:
217 case nir_texop_txb:
218 case nir_texop_lod:
219 info->uses_derivatives = true;
220 break;
221 default:
222 break;
223 }
224 } else if (instr->type == nir_instr_type_intrinsic) {
225 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
226
227 switch (intr->intrinsic) {
228 case nir_intrinsic_load_front_face:
229 info->uses_frontface = 1;
230 break;
231 case nir_intrinsic_load_instance_id:
232 info->uses_instanceid = 1;
233 break;
234 case nir_intrinsic_load_invocation_id:
235 info->uses_invocationid = true;
236 break;
237 case nir_intrinsic_load_num_work_groups:
238 info->uses_grid_size = true;
239 break;
240 case nir_intrinsic_load_local_invocation_index:
241 case nir_intrinsic_load_subgroup_id:
242 case nir_intrinsic_load_num_subgroups:
243 info->uses_subgroup_info = true;
244 break;
245 case nir_intrinsic_load_local_group_size:
246 /* The block size is translated to IMM with a fixed block size. */
247 if (info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
248 info->uses_block_size = true;
249 break;
250 case nir_intrinsic_load_local_invocation_id:
251 case nir_intrinsic_load_work_group_id: {
252 unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa);
253 while (mask) {
254 unsigned i = u_bit_scan(&mask);
255
256 if (intr->intrinsic == nir_intrinsic_load_work_group_id)
257 info->uses_block_id[i] = true;
258 else
259 info->uses_thread_id[i] = true;
260 }
261 break;
262 }
263 case nir_intrinsic_load_vertex_id:
264 info->uses_vertexid = 1;
265 break;
266 case nir_intrinsic_load_vertex_id_zero_base:
267 info->uses_vertexid_nobase = 1;
268 break;
269 case nir_intrinsic_load_base_vertex:
270 info->uses_basevertex = 1;
271 break;
272 case nir_intrinsic_load_draw_id:
273 info->uses_drawid = 1;
274 break;
275 case nir_intrinsic_load_primitive_id:
276 info->uses_primid = 1;
277 break;
278 case nir_intrinsic_load_sample_mask_in:
279 info->reads_samplemask = true;
280 break;
281 case nir_intrinsic_load_tess_level_inner:
282 case nir_intrinsic_load_tess_level_outer:
283 info->reads_tess_factors = true;
284 break;
285 case nir_intrinsic_bindless_image_load:
286 case nir_intrinsic_bindless_image_size:
287 case nir_intrinsic_bindless_image_samples:
288 info->uses_bindless_images = true;
289 break;
290 case nir_intrinsic_bindless_image_store:
291 info->uses_bindless_images = true;
292 info->writes_memory = true;
293 info->num_memory_instructions++; /* we only care about stores */
294 break;
295 case nir_intrinsic_image_deref_store:
296 info->writes_memory = true;
297 info->num_memory_instructions++; /* we only care about stores */
298 break;
299 case nir_intrinsic_bindless_image_atomic_add:
300 case nir_intrinsic_bindless_image_atomic_imin:
301 case nir_intrinsic_bindless_image_atomic_umin:
302 case nir_intrinsic_bindless_image_atomic_imax:
303 case nir_intrinsic_bindless_image_atomic_umax:
304 case nir_intrinsic_bindless_image_atomic_and:
305 case nir_intrinsic_bindless_image_atomic_or:
306 case nir_intrinsic_bindless_image_atomic_xor:
307 case nir_intrinsic_bindless_image_atomic_exchange:
308 case nir_intrinsic_bindless_image_atomic_comp_swap:
309 info->uses_bindless_images = true;
310 info->writes_memory = true;
311 info->num_memory_instructions++; /* we only care about stores */
312 break;
313 case nir_intrinsic_image_deref_atomic_add:
314 case nir_intrinsic_image_deref_atomic_imin:
315 case nir_intrinsic_image_deref_atomic_umin:
316 case nir_intrinsic_image_deref_atomic_imax:
317 case nir_intrinsic_image_deref_atomic_umax:
318 case nir_intrinsic_image_deref_atomic_and:
319 case nir_intrinsic_image_deref_atomic_or:
320 case nir_intrinsic_image_deref_atomic_xor:
321 case nir_intrinsic_image_deref_atomic_exchange:
322 case nir_intrinsic_image_deref_atomic_comp_swap:
323 case nir_intrinsic_image_deref_atomic_inc_wrap:
324 case nir_intrinsic_image_deref_atomic_dec_wrap:
325 info->writes_memory = true;
326 info->num_memory_instructions++; /* we only care about stores */
327 break;
328 case nir_intrinsic_store_ssbo:
329 case nir_intrinsic_ssbo_atomic_add:
330 case nir_intrinsic_ssbo_atomic_imin:
331 case nir_intrinsic_ssbo_atomic_umin:
332 case nir_intrinsic_ssbo_atomic_imax:
333 case nir_intrinsic_ssbo_atomic_umax:
334 case nir_intrinsic_ssbo_atomic_and:
335 case nir_intrinsic_ssbo_atomic_or:
336 case nir_intrinsic_ssbo_atomic_xor:
337 case nir_intrinsic_ssbo_atomic_exchange:
338 case nir_intrinsic_ssbo_atomic_comp_swap:
339 info->writes_memory = true;
340 info->num_memory_instructions++; /* we only care about stores */
341 break;
342 case nir_intrinsic_load_color0:
343 case nir_intrinsic_load_color1: {
344 unsigned index = intr->intrinsic == nir_intrinsic_load_color1;
345 uint8_t mask = nir_ssa_def_components_read(&intr->dest.ssa);
346 info->colors_read |= mask << (index * 4);
347 break;
348 }
349 case nir_intrinsic_load_barycentric_pixel:
350 case nir_intrinsic_load_barycentric_centroid:
351 case nir_intrinsic_load_barycentric_sample:
352 case nir_intrinsic_load_barycentric_at_offset: /* uses center */
353 case nir_intrinsic_load_barycentric_at_sample: { /* uses center */
354 unsigned mode = nir_intrinsic_interp_mode(intr);
355
356 if (mode == INTERP_MODE_FLAT)
357 break;
358
359 if (mode == INTERP_MODE_NOPERSPECTIVE) {
360 if (intr->intrinsic == nir_intrinsic_load_barycentric_sample)
361 info->uses_linear_sample = true;
362 else if (intr->intrinsic == nir_intrinsic_load_barycentric_centroid)
363 info->uses_linear_centroid = true;
364 else
365 info->uses_linear_center = true;
366
367 if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
368 info->uses_linear_opcode_interp_sample = true;
369 } else {
370 if (intr->intrinsic == nir_intrinsic_load_barycentric_sample)
371 info->uses_persp_sample = true;
372 else if (intr->intrinsic == nir_intrinsic_load_barycentric_centroid)
373 info->uses_persp_centroid = true;
374 else
375 info->uses_persp_center = true;
376
377 if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
378 info->uses_persp_opcode_interp_sample = true;
379 }
380 break;
381 }
382 case nir_intrinsic_load_deref: {
383 nir_variable *var = intrinsic_get_var(intr);
384 nir_variable_mode mode = var->data.mode;
385
386 if (mode == nir_var_shader_in) {
387 /* PS inputs use the interpolated load intrinsics. */
388 assert(nir->info.stage != MESA_SHADER_FRAGMENT);
389 gather_intrinsic_load_deref_input_info(nir, intr,
390 nir_src_as_deref(intr->src[0]), info);
391 } else if (mode == nir_var_shader_out) {
392 gather_intrinsic_load_deref_output_info(nir, intr, var, info);
393 }
394 break;
395 }
396 case nir_intrinsic_store_deref: {
397 nir_variable *var = intrinsic_get_var(intr);
398
399 if (var->data.mode == nir_var_shader_out)
400 gather_intrinsic_store_deref_output_info(nir, intr,
401 nir_src_as_deref(intr->src[0]), info);
402 break;
403 }
404 case nir_intrinsic_interp_deref_at_centroid:
405 case nir_intrinsic_interp_deref_at_sample:
406 case nir_intrinsic_interp_deref_at_offset:
407 unreachable("interp opcodes should have been lowered");
408 break;
409 default:
410 break;
411 }
412 }
413 }
414
415 static void scan_output_slot(const nir_variable *var,
416 unsigned var_idx,
417 unsigned component, unsigned num_components,
418 struct si_shader_info *info)
419 {
420 assert(component + num_components <= 4);
421 assert(component < 4);
422
423 unsigned semantic_name, semantic_index;
424
425 unsigned location = var->data.location + var_idx;
426 unsigned drv_location = var->data.driver_location + var_idx;
427
428 if (info->processor == PIPE_SHADER_FRAGMENT) {
429 tgsi_get_gl_frag_result_semantic(location,
430 &semantic_name, &semantic_index);
431
432 /* Adjust for dual source blending */
433 if (var->data.index > 0) {
434 semantic_index++;
435 }
436 } else {
437 tgsi_get_gl_varying_semantic(location, true,
438 &semantic_name, &semantic_index);
439 }
440
441 ubyte usagemask = ((1 << num_components) - 1) << component;
442
443 unsigned gs_out_streams;
444 if (var->data.stream & NIR_STREAM_PACKED) {
445 gs_out_streams = var->data.stream & ~NIR_STREAM_PACKED;
446 } else {
447 assert(var->data.stream < 4);
448 gs_out_streams = 0;
449 for (unsigned j = 0; j < num_components; ++j)
450 gs_out_streams |= var->data.stream << (2 * (component + j));
451 }
452
453 unsigned streamx = gs_out_streams & 3;
454 unsigned streamy = (gs_out_streams >> 2) & 3;
455 unsigned streamz = (gs_out_streams >> 4) & 3;
456 unsigned streamw = (gs_out_streams >> 6) & 3;
457
458 if (usagemask & TGSI_WRITEMASK_X) {
459 info->output_streams[drv_location] |= streamx;
460 info->num_stream_output_components[streamx]++;
461 }
462 if (usagemask & TGSI_WRITEMASK_Y) {
463 info->output_streams[drv_location] |= streamy << 2;
464 info->num_stream_output_components[streamy]++;
465 }
466 if (usagemask & TGSI_WRITEMASK_Z) {
467 info->output_streams[drv_location] |= streamz << 4;
468 info->num_stream_output_components[streamz]++;
469 }
470 if (usagemask & TGSI_WRITEMASK_W) {
471 info->output_streams[drv_location] |= streamw << 6;
472 info->num_stream_output_components[streamw]++;
473 }
474
475 info->output_semantic_name[drv_location] = semantic_name;
476 info->output_semantic_index[drv_location] = semantic_index;
477
478 switch (semantic_name) {
479 case TGSI_SEMANTIC_PRIMID:
480 info->writes_primid = true;
481 break;
482 case TGSI_SEMANTIC_VIEWPORT_INDEX:
483 info->writes_viewport_index = true;
484 break;
485 case TGSI_SEMANTIC_LAYER:
486 info->writes_layer = true;
487 break;
488 case TGSI_SEMANTIC_PSIZE:
489 info->writes_psize = true;
490 break;
491 case TGSI_SEMANTIC_CLIPVERTEX:
492 info->writes_clipvertex = true;
493 break;
494 case TGSI_SEMANTIC_COLOR:
495 info->colors_written |= 1 << semantic_index;
496 break;
497 case TGSI_SEMANTIC_STENCIL:
498 info->writes_stencil = true;
499 break;
500 case TGSI_SEMANTIC_SAMPLEMASK:
501 info->writes_samplemask = true;
502 break;
503 case TGSI_SEMANTIC_EDGEFLAG:
504 info->writes_edgeflag = true;
505 break;
506 case TGSI_SEMANTIC_POSITION:
507 if (info->processor == PIPE_SHADER_FRAGMENT)
508 info->writes_z = true;
509 else
510 info->writes_position = true;
511 break;
512 }
513 }
514
515 static void scan_output_helper(const nir_variable *var,
516 unsigned location,
517 const struct glsl_type *type,
518 struct si_shader_info *info)
519 {
520 if (glsl_type_is_struct(type) || glsl_type_is_interface(type)) {
521 for (unsigned i = 0; i < glsl_get_length(type); i++) {
522 const struct glsl_type *ft = glsl_get_struct_field(type, i);
523 scan_output_helper(var, location, ft, info);
524 location += glsl_count_attribute_slots(ft, false);
525 }
526 } else if (glsl_type_is_array_or_matrix(type)) {
527 const struct glsl_type *elem_type =
528 glsl_get_array_element(type);
529 unsigned num_elems = glsl_get_length(type);
530 if (var->data.compact) {
531 assert(glsl_type_is_scalar(elem_type));
532 assert(glsl_get_bit_size(elem_type) == 32);
533 unsigned component = var->data.location_frac;
534 scan_output_slot(var, location, component,
535 MIN2(num_elems, 4 - component), info);
536 if (component + num_elems > 4) {
537 scan_output_slot(var, location + 1, 0,
538 component + num_elems - 4, info);
539 }
540
541 } else {
542 unsigned elem_count = glsl_count_attribute_slots(elem_type, false);
543 for (unsigned i = 0; i < num_elems; i++) {
544 scan_output_helper(var, location, elem_type, info);
545 location += elem_count;
546 }
547 }
548 } else if (glsl_type_is_dual_slot(type)) {
549 unsigned component = var->data.location_frac;
550 scan_output_slot(var, location, component, 4 - component, info);
551 scan_output_slot(var, location + 1, 0, component + 2 * glsl_get_components(type) - 4,
552 info);
553 } else {
554 unsigned component = var->data.location_frac;
555 assert(glsl_type_is_vector_or_scalar(type));
556 unsigned num_components = glsl_get_components(type);
557 if (glsl_type_is_64bit(type))
558 num_components *= 2;
559 scan_output_slot(var, location, component, num_components, info);
560 }
561 }
562
563 void si_nir_scan_shader(const struct nir_shader *nir,
564 struct si_shader_info *info)
565 {
566 nir_function *func;
567 unsigned i;
568
569 info->processor = pipe_shader_type_from_mesa(nir->info.stage);
570
571 info->properties[TGSI_PROPERTY_NEXT_SHADER] =
572 pipe_shader_type_from_mesa(nir->info.next_stage);
573
574 if (nir->info.stage == MESA_SHADER_VERTEX) {
575 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] =
576 nir->info.vs.window_space_position;
577 info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD] =
578 nir->info.vs.blit_sgprs_amd;
579 }
580
581 if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
582 info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] =
583 nir->info.tess.tcs_vertices_out;
584 }
585
586 if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
587 if (nir->info.tess.primitive_mode == GL_ISOLINES)
588 info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = PIPE_PRIM_LINES;
589 else
590 info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = nir->info.tess.primitive_mode;
591
592 STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
593 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
594 PIPE_TESS_SPACING_FRACTIONAL_ODD);
595 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
596 PIPE_TESS_SPACING_FRACTIONAL_EVEN);
597
598 info->properties[TGSI_PROPERTY_TES_SPACING] = (nir->info.tess.spacing + 1) % 3;
599 info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW] = !nir->info.tess.ccw;
600 info->properties[TGSI_PROPERTY_TES_POINT_MODE] = nir->info.tess.point_mode;
601 }
602
603 if (nir->info.stage == MESA_SHADER_GEOMETRY) {
604 info->properties[TGSI_PROPERTY_GS_INPUT_PRIM] = nir->info.gs.input_primitive;
605 info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM] = nir->info.gs.output_primitive;
606 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] = nir->info.gs.vertices_out;
607 info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = nir->info.gs.invocations;
608 }
609
610 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
611 info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] =
612 nir->info.fs.early_fragment_tests | nir->info.fs.post_depth_coverage;
613 info->properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE] = nir->info.fs.post_depth_coverage;
614
615 if (nir->info.fs.pixel_center_integer) {
616 info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] =
617 TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
618 }
619
620 if (nir->info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
621 switch (nir->info.fs.depth_layout) {
622 case FRAG_DEPTH_LAYOUT_ANY:
623 info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_ANY;
624 break;
625 case FRAG_DEPTH_LAYOUT_GREATER:
626 info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_GREATER;
627 break;
628 case FRAG_DEPTH_LAYOUT_LESS:
629 info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_LESS;
630 break;
631 case FRAG_DEPTH_LAYOUT_UNCHANGED:
632 info->properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT] = TGSI_FS_DEPTH_LAYOUT_UNCHANGED;
633 break;
634 default:
635 unreachable("Unknow depth layout");
636 }
637 }
638 }
639
640 if (gl_shader_stage_is_compute(nir->info.stage)) {
641 info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.cs.local_size[0];
642 info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.cs.local_size[1];
643 info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.cs.local_size[2];
644 info->properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD] = nir->info.cs.user_data_components_amd;
645 }
646
647 i = 0;
648 uint64_t processed_inputs = 0;
649 nir_foreach_variable(variable, &nir->inputs) {
650 unsigned semantic_name, semantic_index;
651
652 const struct glsl_type *type = variable->type;
653 if (nir_is_per_vertex_io(variable, nir->info.stage)) {
654 assert(glsl_type_is_array(type));
655 type = glsl_get_array_element(type);
656 }
657
658 unsigned attrib_count = glsl_count_attribute_slots(type,
659 nir->info.stage == MESA_SHADER_VERTEX);
660
661 i = variable->data.driver_location;
662
663 /* Vertex shader inputs don't have semantics. The state
664 * tracker has already mapped them to attributes via
665 * variable->data.driver_location.
666 */
667 if (nir->info.stage == MESA_SHADER_VERTEX)
668 continue;
669
670 for (unsigned j = 0; j < attrib_count; j++, i++) {
671
672 if (processed_inputs & ((uint64_t)1 << i))
673 continue;
674
675 processed_inputs |= ((uint64_t)1 << i);
676
677 tgsi_get_gl_varying_semantic(variable->data.location + j, true,
678 &semantic_name, &semantic_index);
679
680 info->input_semantic_name[i] = semantic_name;
681 info->input_semantic_index[i] = semantic_index;
682
683 if (semantic_name == TGSI_SEMANTIC_PRIMID)
684 info->uses_primid = true;
685
686 if (semantic_name == TGSI_SEMANTIC_COLOR) {
687 /* We only need this for color inputs. */
688 if (variable->data.sample)
689 info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE;
690 else if (variable->data.centroid)
691 info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID;
692 else
693 info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER;
694 }
695
696 enum glsl_base_type base_type =
697 glsl_get_base_type(glsl_without_array(variable->type));
698
699 switch (variable->data.interpolation) {
700 case INTERP_MODE_NONE:
701 if (glsl_base_type_is_integer(base_type)) {
702 info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
703 break;
704 }
705
706 if (semantic_name == TGSI_SEMANTIC_COLOR) {
707 info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR;
708 break;
709 }
710 /* fall-through */
711
712 case INTERP_MODE_SMOOTH:
713 assert(!glsl_base_type_is_integer(base_type));
714
715 info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE;
716 break;
717
718 case INTERP_MODE_NOPERSPECTIVE:
719 assert(!glsl_base_type_is_integer(base_type));
720
721 info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR;
722 break;
723
724 case INTERP_MODE_FLAT:
725 info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
726 break;
727 }
728 }
729 }
730
731 nir_foreach_variable(variable, &nir->outputs) {
732 const struct glsl_type *type = variable->type;
733 if (nir_is_per_vertex_io(variable, nir->info.stage)) {
734 assert(glsl_type_is_array(type));
735 type = glsl_get_array_element(type);
736 }
737
738 ASSERTED unsigned attrib_count = glsl_count_attribute_slots(type, false);
739 scan_output_helper(variable, 0, type, info);
740
741 unsigned loc = variable->data.location;
742 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
743 loc == FRAG_RESULT_COLOR &&
744 nir->info.outputs_written & (1ull << loc)) {
745 assert(attrib_count == 1);
746 info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] = true;
747 }
748 }
749
750 info->num_inputs = nir->num_inputs;
751 info->num_outputs = nir->num_outputs;
752
753 info->constbuf0_num_slots = nir->num_uniforms;
754 info->shader_buffers_declared = u_bit_consecutive(0, nir->info.num_ssbos);
755 info->const_buffers_declared = u_bit_consecutive(1, nir->info.num_ubos);
756 if (nir->num_uniforms > 0)
757 info->const_buffers_declared |= 1;
758 info->images_declared = u_bit_consecutive(0, nir->info.num_images);
759 info->msaa_images_declared = u_bit_consecutive(0, nir->info.last_msaa_image + 1);
760 info->samplers_declared = nir->info.textures_used;
761
762 info->num_written_clipdistance = nir->info.clip_distance_array_size;
763 info->num_written_culldistance = nir->info.cull_distance_array_size;
764 info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
765 info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance);
766
767 if (info->processor == PIPE_SHADER_FRAGMENT)
768 info->uses_kill = nir->info.fs.uses_discard;
769
770 if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
771 info->tessfactors_are_def_in_all_invocs =
772 ac_are_tessfactors_def_in_all_invocs(nir);
773 }
774
775 func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
776 nir_foreach_block(block, func->impl) {
777 nir_foreach_instr(instr, block)
778 scan_instruction(nir, info, instr);
779 }
780 }
781
782 static void
783 si_nir_opts(struct nir_shader *nir)
784 {
785 bool progress;
786
787 do {
788 progress = false;
789
790 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
791
792 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
793 NIR_PASS(progress, nir, nir_opt_dead_write_vars);
794
795 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
796 NIR_PASS_V(nir, nir_lower_phis_to_scalar);
797
798 /* (Constant) copy propagation is needed for txf with offsets. */
799 NIR_PASS(progress, nir, nir_copy_prop);
800 NIR_PASS(progress, nir, nir_opt_remove_phis);
801 NIR_PASS(progress, nir, nir_opt_dce);
802 if (nir_opt_trivial_continues(nir)) {
803 progress = true;
804 NIR_PASS(progress, nir, nir_copy_prop);
805 NIR_PASS(progress, nir, nir_opt_dce);
806 }
807 NIR_PASS(progress, nir, nir_opt_if, true);
808 NIR_PASS(progress, nir, nir_opt_dead_cf);
809 NIR_PASS(progress, nir, nir_opt_cse);
810 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
811
812 /* Needed for algebraic lowering */
813 NIR_PASS(progress, nir, nir_opt_algebraic);
814 NIR_PASS(progress, nir, nir_opt_constant_folding);
815
816 if (!nir->info.flrp_lowered) {
817 unsigned lower_flrp =
818 (nir->options->lower_flrp16 ? 16 : 0) |
819 (nir->options->lower_flrp32 ? 32 : 0) |
820 (nir->options->lower_flrp64 ? 64 : 0);
821 assert(lower_flrp);
822 bool lower_flrp_progress = false;
823
824 NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp,
825 lower_flrp,
826 false /* always_precise */,
827 nir->options->lower_ffma);
828 if (lower_flrp_progress) {
829 NIR_PASS(progress, nir,
830 nir_opt_constant_folding);
831 progress = true;
832 }
833
834 /* Nothing should rematerialize any flrps, so we only
835 * need to do this lowering once.
836 */
837 nir->info.flrp_lowered = true;
838 }
839
840 NIR_PASS(progress, nir, nir_opt_undef);
841 NIR_PASS(progress, nir, nir_opt_conditional_discard);
842 if (nir->options->max_unroll_iterations) {
843 NIR_PASS(progress, nir, nir_opt_loop_unroll, 0);
844 }
845 } while (progress);
846 }
847
848 static int
849 type_size_vec4(const struct glsl_type *type, bool bindless)
850 {
851 return glsl_count_attribute_slots(type, false);
852 }
853
854 static void
855 si_nir_lower_color(nir_shader *nir)
856 {
857 nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
858
859 nir_builder b;
860 nir_builder_init(&b, entrypoint);
861
862 nir_foreach_block(block, entrypoint) {
863 nir_foreach_instr_safe(instr, block) {
864 if (instr->type != nir_instr_type_intrinsic)
865 continue;
866
867 nir_intrinsic_instr *intrin =
868 nir_instr_as_intrinsic(instr);
869
870 if (intrin->intrinsic != nir_intrinsic_load_deref)
871 continue;
872
873 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
874 if (deref->mode != nir_var_shader_in)
875 continue;
876
877 b.cursor = nir_before_instr(instr);
878 nir_variable *var = nir_deref_instr_get_variable(deref);
879 nir_ssa_def *def;
880
881 if (var->data.location == VARYING_SLOT_COL0) {
882 def = nir_load_color0(&b);
883 } else if (var->data.location == VARYING_SLOT_COL1) {
884 def = nir_load_color1(&b);
885 } else {
886 continue;
887 }
888
889 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(def));
890 nir_instr_remove(instr);
891 }
892 }
893 }
894
895 static void si_nir_lower_ps_inputs(struct nir_shader *nir)
896 {
897 if (nir->info.stage != MESA_SHADER_FRAGMENT)
898 return;
899
900 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
901 nir_shader_get_entrypoint(nir), false, true);
902
903 /* Since we're doing nir_lower_io_to_temporaries late, we need
904 * to lower all the copy_deref's introduced by
905 * lower_io_to_temporaries before calling nir_lower_io.
906 */
907 NIR_PASS_V(nir, nir_split_var_copies);
908 NIR_PASS_V(nir, nir_lower_var_copies);
909 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
910
911 si_nir_lower_color(nir);
912 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
913
914 /* This pass needs actual constants */
915 NIR_PASS_V(nir, nir_opt_constant_folding);
916 NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
917 nir_var_shader_in);
918 }
919
920 void si_nir_adjust_driver_locations(struct nir_shader *nir)
921 {
922 /* Adjust the driver location of inputs and outputs. The state tracker
923 * interprets them as slots, while the ac/nir backend interprets them
924 * as individual components.
925 */
926 if (nir->info.stage != MESA_SHADER_FRAGMENT) {
927 nir_foreach_variable(variable, &nir->inputs)
928 variable->data.driver_location *= 4;
929 }
930
931 nir_foreach_variable(variable, &nir->outputs)
932 variable->data.driver_location *= 4;
933 }
934
935 /**
936 * Perform "lowering" operations on the NIR that are run once when the shader
937 * selector is created.
938 */
939 static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
940 {
941 /* Perform lowerings (and optimizations) of code.
942 *
943 * Performance considerations aside, we must:
944 * - lower certain ALU operations
945 * - ensure constant offsets for texture instructions are folded
946 * and copy-propagated
947 */
948
949 static const struct nir_lower_tex_options lower_tex_options = {
950 .lower_txp = ~0u,
951 };
952 NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
953
954 const nir_lower_subgroups_options subgroups_options = {
955 .subgroup_size = 64,
956 .ballot_bit_size = 64,
957 .lower_to_scalar = true,
958 .lower_subgroup_masks = true,
959 .lower_vote_trivial = false,
960 .lower_vote_eq_to_ballot = true,
961 };
962 NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
963
964 /* Lower load constants to scalar and then clean up the mess */
965 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
966 NIR_PASS_V(nir, nir_lower_var_copies);
967 NIR_PASS_V(nir, nir_lower_pack);
968 NIR_PASS_V(nir, nir_opt_access);
969 si_nir_opts(nir);
970
971 /* Lower large variables that are always constant with load_constant
972 * intrinsics, which get turned into PC-relative loads from a data
973 * section next to the shader.
974 *
975 * st/mesa calls finalize_nir twice, but we can't call this pass twice.
976 */
977 bool changed = false;
978 if (!nir->constant_data) {
979 NIR_PASS(changed, nir, nir_opt_large_constants,
980 glsl_get_natural_size_align_bytes, 16);
981 }
982
983 changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class);
984 if (changed)
985 si_nir_opts(nir);
986
987 NIR_PASS_V(nir, nir_lower_bool_to_int32);
988 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);
989
990 if (sscreen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL))
991 NIR_PASS_V(nir, nir_lower_discard_to_demote);
992 }
993
994 void si_finalize_nir(struct pipe_screen *screen, void *nirptr, bool optimize)
995 {
996 struct si_screen *sscreen = (struct si_screen *)screen;
997 struct nir_shader *nir = (struct nir_shader *)nirptr;
998
999 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1000 si_nir_lower_ps_inputs(nir);
1001 si_lower_nir(sscreen, nir);
1002 }