0a4246fb0675065e9f4d291ba67e7cc86d54759a
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_nir.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2019 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_nir.h"
28 #include "nir_builder.h"
29
30 #include "../r600_pipe.h"
31 #include "../r600_shader.h"
32
33 #include "sfn_instruction_tex.h"
34
35 #include "sfn_shader_vertex.h"
36 #include "sfn_shader_fragment.h"
37 #include "sfn_shader_geometry.h"
38 #include "sfn_shader_compute.h"
39 #include "sfn_shader_tcs.h"
40 #include "sfn_shader_tess_eval.h"
41 #include "sfn_nir_lower_fs_out_to_vector.h"
42 #include "sfn_ir_to_assembly.h"
43
44 #include <vector>
45
46 namespace r600 {
47
48 using std::vector;
49
50 ShaderFromNir::ShaderFromNir():sh(nullptr),
51 m_current_if_id(0),
52 m_current_loop_id(0)
53 {
54 }
55
56 bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
57 r600_pipe_shader_selector *sel, r600_shader_key& key,
58 struct r600_shader* gs_shader, enum chip_class _chip_class)
59 {
60 sh = shader;
61 chip_class = _chip_class;
62 assert(sh);
63
64 switch (shader->info.stage) {
65 case MESA_SHADER_VERTEX:
66 impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
67 break;
68 case MESA_SHADER_TESS_CTRL:
69 sfn_log << SfnLog::trans << "Start TCS\n";
70 impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, chip_class));
71 break;
72 case MESA_SHADER_TESS_EVAL:
73 sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
74 impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
75 break;
76 case MESA_SHADER_GEOMETRY:
77 sfn_log << SfnLog::trans << "Start GS\n";
78 impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, chip_class));
79 break;
80 case MESA_SHADER_FRAGMENT:
81 sfn_log << SfnLog::trans << "Start FS\n";
82 impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, chip_class));
83 break;
84 case MESA_SHADER_COMPUTE:
85 sfn_log << SfnLog::trans << "Start CS\n";
86 impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, chip_class));
87 break;
88 default:
89 return false;
90 }
91
92 sfn_log << SfnLog::trans << "Process declarations\n";
93 if (!process_declaration())
94 return false;
95
96 // at this point all functions should be inlined
97 const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
98
99 sfn_log << SfnLog::trans << "Scan shader\n";
100 nir_foreach_block(block, func->impl) {
101 nir_foreach_instr(instr, block) {
102 if (!impl->scan_instruction(instr)) {
103 fprintf(stderr, "Unhandled sysvalue access ");
104 nir_print_instr(instr, stderr);
105 fprintf(stderr, "\n");
106 return false;
107 }
108 }
109 }
110
111 sfn_log << SfnLog::trans << "Reserve registers\n";
112 if (!impl->allocate_reserved_registers()) {
113 return false;
114 }
115
116 ValuePool::array_list arrays;
117 sfn_log << SfnLog::trans << "Allocate local registers\n";
118 foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
119 impl->allocate_local_register(*reg, arrays);
120 }
121
122 sfn_log << SfnLog::trans << "Emit shader start\n";
123 impl->allocate_arrays(arrays);
124
125 impl->emit_shader_start();
126
127 sfn_log << SfnLog::trans << "Process shader \n";
128 foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
129 if (!process_cf_node(node))
130 return false;
131 }
132
133 // Add optimizations here
134 sfn_log << SfnLog::trans << "Finalize\n";
135 impl->finalize();
136
137 if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
138 sfn_log << SfnLog::trans << "Merge registers\n";
139 impl->remap_registers();
140 }
141 sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
142 return true;
143 }
144
145 Shader ShaderFromNir::shader() const
146 {
147 return Shader{impl->m_output, impl->get_temp_registers()};
148 }
149
150
151 bool ShaderFromNir::process_cf_node(nir_cf_node *node)
152 {
153 SFN_TRACE_FUNC(SfnLog::flow, "CF");
154 switch (node->type) {
155 case nir_cf_node_block:
156 return process_block(nir_cf_node_as_block(node));
157 case nir_cf_node_if:
158 return process_if(nir_cf_node_as_if(node));
159 case nir_cf_node_loop:
160 return process_loop(nir_cf_node_as_loop(node));
161 default:
162 return false;
163 }
164 }
165
166 bool ShaderFromNir::process_if(nir_if *if_stmt)
167 {
168 SFN_TRACE_FUNC(SfnLog::flow, "IF");
169
170 if (!impl->emit_if_start(m_current_if_id, if_stmt))
171 return false;
172
173 int if_id = m_current_if_id++;
174 m_if_stack.push(if_id);
175
176 foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
177 if (!process_cf_node(n)) return false;
178
179 if (!if_stmt->then_list.is_empty()) {
180 if (!impl->emit_else_start(if_id))
181 return false;
182
183 foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
184 if (!process_cf_node(n)) return false;
185 }
186
187 if (!impl->emit_ifelse_end(if_id))
188 return false;
189
190 m_if_stack.pop();
191 return true;
192 }
193
194 bool ShaderFromNir::process_loop(nir_loop *node)
195 {
196 SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
197 int loop_id = m_current_loop_id++;
198
199 if (!impl->emit_loop_start(loop_id))
200 return false;
201
202 foreach_list_typed(nir_cf_node, n, node, &node->body)
203 if (!process_cf_node(n)) return false;
204
205 if (!impl->emit_loop_end(loop_id))
206 return false;
207
208 return true;
209 }
210
211 bool ShaderFromNir::process_block(nir_block *block)
212 {
213 SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
214 nir_foreach_instr(instr, block) {
215 int r = emit_instruction(instr);
216 if (!r) {
217 sfn_log << SfnLog::err << "R600: Unsupported instruction: "
218 << *instr << "\n";
219 return false;
220 }
221 }
222 return true;
223 }
224
225
226 ShaderFromNir::~ShaderFromNir()
227 {
228 }
229
230 pipe_shader_type ShaderFromNir::processor_type() const
231 {
232 return impl->m_processor_type;
233 }
234
235
236 bool ShaderFromNir::emit_instruction(nir_instr *instr)
237 {
238 assert(impl);
239
240 sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
241
242 switch (instr->type) {
243 case nir_instr_type_alu:
244 return impl->emit_alu_instruction(instr);
245 case nir_instr_type_deref:
246 return impl->emit_deref_instruction(nir_instr_as_deref(instr));
247 case nir_instr_type_intrinsic:
248 return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
249 case nir_instr_type_load_const:
250 return impl->set_literal_constant(nir_instr_as_load_const(instr));
251 case nir_instr_type_tex:
252 return impl->emit_tex_instruction(instr);
253 case nir_instr_type_jump:
254 return impl->emit_jump_instruction(nir_instr_as_jump(instr));
255 default:
256 fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
257 nir_print_instr(instr, stderr);
258 fprintf(stderr, "'\n");
259 return false;
260 case nir_instr_type_ssa_undef:
261 return impl->create_undef(nir_instr_as_ssa_undef(instr));
262 return true;
263 }
264 }
265
266 bool ShaderFromNir::process_declaration()
267 {
268 // scan declarations
269 nir_foreach_variable(variable, &sh->inputs) {
270 if (!impl->process_inputs(variable)) {
271 fprintf(stderr, "R600: error parsing input varible %s\n", variable->name);
272 return false;
273 }
274 }
275
276 // scan declarations
277 nir_foreach_variable(variable, &sh->outputs) {
278 if (!impl->process_outputs(variable)) {
279 fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
280 return false;
281 }
282 }
283
284 // scan declarations
285 nir_foreach_variable(variable, &sh->uniforms) {
286 if (!impl->process_uniforms(variable)) {
287 fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
288 return false;
289 }
290 }
291
292 return true;
293 }
294
295 const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
296 {
297 assert(impl);
298 return impl->m_output;
299 }
300
301
302 AssemblyFromShader::~AssemblyFromShader()
303 {
304 }
305
306 bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
307 {
308 return do_lower(ir);
309 }
310
311 static nir_ssa_def *
312 r600_nir_lower_pack_unpack_2x16_impl(nir_builder *b, nir_instr *instr, void *_options)
313 {
314 nir_alu_instr *alu = nir_instr_as_alu(instr);
315
316 switch (alu->op) {
317 case nir_op_unpack_half_2x16: {
318 nir_ssa_def *packed = nir_ssa_for_alu_src(b, alu, 0);
319 return nir_vec2(b, nir_unpack_half_2x16_split_x(b, packed),
320 nir_unpack_half_2x16_split_y(b, packed));
321
322 }
323 case nir_op_pack_half_2x16: {
324 nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, alu, 0);
325 return nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
326 nir_channel(b, src_vec2, 1));
327 }
328 default:
329 return nullptr;
330 }
331 }
332
333 bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr *instr, const void *_options)
334 {
335 return instr->type == nir_instr_type_alu;
336 }
337
338 bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader)
339 {
340 return nir_shader_lower_instructions(shader,
341 r600_nir_lower_pack_unpack_2x16_filter,
342 r600_nir_lower_pack_unpack_2x16_impl,
343 nullptr);
344 };
345
346 static void
347 r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
348 {
349 b->cursor = nir_before_instr(&instr->instr);
350
351 int address_index = 0;
352 int align;
353
354 if (instr->intrinsic == nir_intrinsic_store_scratch) {
355 align = instr->src[0].ssa->num_components;
356 address_index = 1;
357 } else{
358 align = instr->dest.ssa.num_components;
359 }
360
361 nir_ssa_def *address = instr->src[address_index].ssa;
362 nir_ssa_def *new_address = nir_ishr(b, address, nir_imm_int(b, 4 * align));
363
364 nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
365 nir_src_for_ssa(new_address));
366 }
367
368 bool r600_lower_scratch_addresses(nir_shader *shader)
369 {
370 bool progress = false;
371 nir_foreach_function(function, shader) {
372 nir_builder build;
373 nir_builder_init(&build, function->impl);
374
375 nir_foreach_block(block, function->impl) {
376 nir_foreach_instr(instr, block) {
377 if (instr->type != nir_instr_type_intrinsic)
378 continue;
379 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
380 if (op->intrinsic != nir_intrinsic_load_scratch &&
381 op->intrinsic != nir_intrinsic_store_scratch)
382 continue;
383 r600_nir_lower_scratch_address_impl(&build, op);
384 progress = true;
385 }
386 }
387 }
388 return progress;
389 }
390
391 static nir_ssa_def *
392 r600_lower_ubo_to_align16_impl(nir_builder *b, nir_instr *instr, void *_options)
393 {
394 b->cursor = nir_before_instr(instr);
395
396 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
397 assert(op->intrinsic == nir_intrinsic_load_ubo);
398
399 bool const_address = (nir_src_is_const(op->src[1]) && nir_src_is_const(op->src[0]));
400
401 nir_ssa_def *offset = op->src[1].ssa;
402
403 /* This is ugly: With const addressing we can actually set a proper fetch target mask,
404 * but for this we need the component encoded, we don't shift and do de decoding in the
405 * backend. Otherwise we shift by four and resolve the component here
406 * (TODO: encode the start component in the intrinsic when the offset base is non-constant
407 * but a multiple of 16 */
408
409 nir_ssa_def *new_offset = offset;
410 if (!const_address)
411 new_offset = nir_ishr(b, offset, nir_imm_int(b, 4));
412
413 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo_r600);
414 load->num_components = const_address ? op->num_components : 4;
415 load->src[0] = op->src[0];
416 load->src[1] = nir_src_for_ssa(new_offset);
417 nir_intrinsic_set_align(load, nir_intrinsic_align_mul(op), nir_intrinsic_align_offset(op));
418
419 nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL);
420 nir_builder_instr_insert(b, &load->instr);
421
422 /* when four components are loaded or both the offset and the location
423 * are constant, then the backend can deal with it better */
424 if (op->num_components == 4 || const_address)
425 return &load->dest.ssa;
426
427 /* What comes below is a performance disaster when the offset is not constant
428 * because then we have to assume that any component can be the first one and we
429 * have to pick the result manually. */
430 nir_ssa_def *first_comp = nir_iand(b, nir_ishr(b, offset, nir_imm_int(b, 2)),
431 nir_imm_int(b,3));
432
433 const unsigned swz_000[4] = {0, 0, 0};
434 nir_ssa_def *component_select = nir_ieq(b, r600_imm_ivec3(b, 0, 1, 2),
435 nir_swizzle(b, first_comp, swz_000, 3));
436
437 if (op->num_components == 1) {
438 nir_ssa_def *check0 = nir_bcsel(b, nir_channel(b, component_select, 0),
439 nir_channel(b, &load->dest.ssa, 0),
440 nir_channel(b, &load->dest.ssa, 3));
441 nir_ssa_def *check1 = nir_bcsel(b, nir_channel(b, component_select, 1),
442 nir_channel(b, &load->dest.ssa, 1),
443 check0);
444 return nir_bcsel(b, nir_channel(b, component_select, 2),
445 nir_channel(b, &load->dest.ssa, 2),
446 check1);
447 } else if (op->num_components == 2) {
448 const unsigned szw_01[2] = {0, 1};
449 const unsigned szw_12[2] = {1, 2};
450 const unsigned szw_23[2] = {2, 3};
451
452 nir_ssa_def *check0 = nir_bcsel(b, nir_channel(b, component_select, 0),
453 nir_swizzle(b, &load->dest.ssa, szw_01, 2),
454 nir_swizzle(b, &load->dest.ssa, szw_23, 2));
455 return nir_bcsel(b, nir_channel(b, component_select, 1),
456 nir_swizzle(b, &load->dest.ssa, szw_12, 2),
457 check0);
458 } else {
459 const unsigned szw_012[3] = {0, 1, 2};
460 const unsigned szw_123[3] = {1, 2, 3};
461 return nir_bcsel(b, nir_channel(b, component_select, 0),
462 nir_swizzle(b, &load->dest.ssa, szw_012, 3),
463 nir_swizzle(b, &load->dest.ssa, szw_123, 3));
464 }
465 }
466
467 bool r600_lower_ubo_to_align16_filter(const nir_instr *instr, const void *_options)
468 {
469 if (instr->type != nir_instr_type_intrinsic)
470 return false;
471
472 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
473 return op->intrinsic == nir_intrinsic_load_ubo;
474 }
475
476
477 bool r600_lower_ubo_to_align16(nir_shader *shader)
478 {
479 return nir_shader_lower_instructions(shader,
480 r600_lower_ubo_to_align16_filter,
481 r600_lower_ubo_to_align16_impl,
482 nullptr);
483 }
484
485 }
486
487 using r600::r600_nir_lower_int_tg4;
488 using r600::r600_nir_lower_pack_unpack_2x16;
489 using r600::r600_lower_scratch_addresses;
490 using r600::r600_lower_fs_out_to_vector;
491 using r600::r600_lower_ubo_to_align16;
492
493 int
494 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
495 {
496 return glsl_count_vec4_slots(type, false, is_bindless);
497 }
498
499 void
500 r600_get_natural_size_align_bytes(const struct glsl_type *type,
501 unsigned *size, unsigned *align)
502 {
503 if (type->base_type != GLSL_TYPE_ARRAY) {
504 *align = 1;
505 *size = 1;
506 } else {
507 unsigned elem_size, elem_align;
508 glsl_get_natural_size_align_bytes(type->fields.array,
509 &elem_size, &elem_align);
510 *align = 1;
511 *size = type->length;
512 }
513 }
514
515 static bool
516 r600_lower_shared_io_impl(nir_function *func)
517 {
518 nir_builder b;
519 nir_builder_init(&b, func->impl);
520
521 bool progress = false;
522 nir_foreach_block(block, func->impl) {
523 nir_foreach_instr_safe(instr, block) {
524
525 if (instr->type != nir_instr_type_intrinsic)
526 continue;
527
528 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
529 if (op->intrinsic != nir_intrinsic_load_shared &&
530 op->intrinsic != nir_intrinsic_store_shared)
531 continue;
532
533 b.cursor = nir_before_instr(instr);
534
535 if (op->intrinsic == nir_intrinsic_load_shared) {
536 nir_ssa_def *addr = op->src[0].ssa;
537
538 switch (nir_dest_num_components(op->dest)) {
539 case 2: {
540 auto addr2 = nir_iadd_imm(&b, addr, 4);
541 addr = nir_vec2(&b, addr, addr2);
542 break;
543 }
544 case 3: {
545 auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
546 addr = nir_vec3(&b, addr,
547 nir_channel(&b, addr2, 0),
548 nir_channel(&b, addr2, 1));
549 break;
550 }
551 case 4: {
552 addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
553 break;
554 }
555 }
556
557 auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
558 load->num_components = nir_dest_num_components(op->dest);
559 load->src[0] = nir_src_for_ssa(addr);
560 nir_ssa_dest_init(&load->instr, &load->dest,
561 load->num_components, 32, NULL);
562 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
563 nir_builder_instr_insert(&b, &load->instr);
564 } else {
565 nir_ssa_def *addr = op->src[1].ssa;
566 for (int i = 0; i < 2; ++i) {
567 unsigned test_mask = (0x3 << 2 * i);
568 if (!(nir_intrinsic_write_mask(op) & test_mask))
569 continue;
570
571 auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
572 unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
573 nir_intrinsic_set_write_mask(store, writemask);
574 store->src[0] = nir_src_for_ssa(op->src[0].ssa);
575 store->num_components = store->src[0].ssa->num_components;
576 bool start_even = (writemask & (1u << (2 * i)));
577
578 auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
579 store->src[1] = nir_src_for_ssa(addr2);
580
581 nir_builder_instr_insert(&b, &store->instr);
582 }
583 }
584 nir_instr_remove(instr);
585 progress = true;
586 }
587 }
588 return progress;
589 }
590
591 static bool
592 r600_lower_shared_io(nir_shader *nir)
593 {
594 bool progress=false;
595 nir_foreach_function(function, nir) {
596 if (function->impl &&
597 r600_lower_shared_io_impl(function))
598 progress = true;
599 }
600 return progress;
601 }
602
603 static bool
604 optimize_once(nir_shader *shader)
605 {
606 bool progress = false;
607 NIR_PASS(progress, shader, nir_copy_prop);
608 NIR_PASS(progress, shader, nir_opt_dce);
609 NIR_PASS(progress, shader, nir_opt_algebraic);
610 NIR_PASS(progress, shader, nir_opt_constant_folding);
611 NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
612 NIR_PASS(progress, shader, nir_opt_vectorize);
613
614 NIR_PASS(progress, shader, nir_opt_remove_phis);
615
616 if (nir_opt_trivial_continues(shader)) {
617 progress = true;
618 NIR_PASS(progress, shader, nir_copy_prop);
619 NIR_PASS(progress, shader, nir_opt_dce);
620 }
621
622 NIR_PASS(progress, shader, nir_opt_if, false);
623 NIR_PASS(progress, shader, nir_opt_dead_cf);
624 NIR_PASS(progress, shader, nir_opt_cse);
625 NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
626
627 NIR_PASS(progress, shader, nir_opt_conditional_discard);
628 NIR_PASS(progress, shader, nir_opt_dce);
629 NIR_PASS(progress, shader, nir_opt_undef);
630 return progress;
631 }
632
633 bool has_saturate(const nir_function *func)
634 {
635 nir_foreach_block(block, func->impl) {
636 nir_foreach_instr(instr, block) {
637 if (instr->type == nir_instr_type_alu) {
638 auto alu = nir_instr_as_alu(instr);
639 if (alu->dest.saturate)
640 return true;
641 }
642 }
643 }
644 return false;
645 }
646
647 int r600_shader_from_nir(struct r600_context *rctx,
648 struct r600_pipe_shader *pipeshader,
649 r600_shader_key *key)
650 {
651 char filename[4000];
652 struct r600_pipe_shader_selector *sel = pipeshader->selector;
653
654 r600::ShaderFromNir convert;
655
656 if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
657 fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
658 nir_print_shader(sel->nir, stderr);
659 fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
660 }
661
662 NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
663 NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
664 NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
665
666 NIR_PASS_V(sel->nir, r600_lower_shared_io);
667
668 static const struct nir_lower_tex_options lower_tex_options = {
669 .lower_txp = ~0u,
670 };
671 NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
672 NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
673
674 NIR_PASS_V(sel->nir, r600_nir_lower_int_tg4);
675 NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
676
677 NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, r600_glsl_type_size,
678 nir_lower_io_lower_64bit_to_32);
679
680 if (sel->nir->info.stage == MESA_SHADER_VERTEX)
681 NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
682
683 if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
684 NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
685
686 if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
687 (sel->nir->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
688 NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_out, r600_glsl_type_size,
689 nir_lower_io_lower_64bit_to_32);
690 NIR_PASS_V(sel->nir, r600_lower_tess_io, (pipe_prim_type)key->tcs.prim_mode);
691 }
692
693 if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
694 sel->nir->info.stage == MESA_SHADER_TESS_EVAL) {
695 NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, r600_glsl_type_size,
696 nir_lower_io_lower_64bit_to_32);
697 }
698
699 if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
700 sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
701 (sel->nir->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
702 auto prim_type = sel->nir->info.stage == MESA_SHADER_TESS_CTRL ?
703 key->tcs.prim_mode : sel->nir->info.tess.primitive_mode;
704 NIR_PASS_V(sel->nir, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type));
705 }
706
707
708 if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL)
709 NIR_PASS_V(sel->nir, r600_append_tcs_TF_emission,
710 (pipe_prim_type)key->tcs.prim_mode);
711
712
713 const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sel->nir->functions));
714 bool optimize = func->impl->registers.length() == 0 && !has_saturate(func);
715
716 if (optimize) {
717 optimize_once(sel->nir);
718 NIR_PASS_V(sel->nir, r600_lower_ubo_to_align16);
719 }
720 /* It seems the output of this optimization is cached somewhere, and
721 * when there are registers, then we can no longer copy propagate, so
722 * skip the optimization then. (There is probably a better way, but yeah)
723 */
724 if (optimize)
725 while(optimize_once(sel->nir));
726
727 NIR_PASS_V(sel->nir, nir_remove_dead_variables, nir_var_shader_in, NULL);
728 NIR_PASS_V(sel->nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
729
730
731 NIR_PASS_V(sel->nir, nir_lower_vars_to_scratch,
732 nir_var_function_temp,
733 40,
734 r600_get_natural_size_align_bytes);
735
736 while (optimize && optimize_once(sel->nir));
737
738 NIR_PASS_V(sel->nir, nir_lower_locals_to_regs);
739 //NIR_PASS_V(sel->nir, nir_opt_algebraic);
740 //NIR_PASS_V(sel->nir, nir_copy_prop);
741 NIR_PASS_V(sel->nir, nir_lower_to_source_mods, nir_lower_float_source_mods);
742 NIR_PASS_V(sel->nir, nir_convert_from_ssa, true);
743 NIR_PASS_V(sel->nir, nir_opt_dce);
744
745 if ((rctx->screen->b.debug_flags & DBG_NIR) &&
746 (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
747 fprintf(stderr, "-- NIR --------------------------------------------------------\n");
748 struct nir_function *func = (struct nir_function *)exec_list_get_head(&sel->nir->functions);
749 nir_index_ssa_defs(func->impl);
750 nir_print_shader(sel->nir, stderr);
751 fprintf(stderr, "-- END --------------------------------------------------------\n");
752 }
753
754 memset(&pipeshader->shader, 0, sizeof(r600_shader));
755 pipeshader->scratch_space_needed = sel->nir->scratch_size;
756
757 if (sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
758 sel->nir->info.stage == MESA_SHADER_VERTEX ||
759 sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
760 pipeshader->shader.clip_dist_write |= ((1 << sel->nir->info.clip_distance_array_size) - 1);
761 pipeshader->shader.cull_dist_write = ((1 << sel->nir->info.cull_distance_array_size) - 1)
762 << sel->nir->info.clip_distance_array_size;
763 pipeshader->shader.cc_dist_mask = (1 << (sel->nir->info.cull_distance_array_size +
764 sel->nir->info.clip_distance_array_size)) - 1;
765 }
766
767 struct r600_shader* gs_shader = nullptr;
768 if (rctx->gs_shader)
769 gs_shader = &rctx->gs_shader->current->shader;
770 r600_screen *rscreen = rctx->screen;
771
772 bool r = convert.lower(sel->nir, pipeshader, sel, *key, gs_shader, rscreen->b.chip_class);
773 if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
774 static int shnr = 0;
775
776 snprintf(filename, 4000, "nir-%s_%d.inc", sel->nir->info.name, shnr++);
777
778 if (access(filename, F_OK) == -1) {
779 FILE *f = fopen(filename, "w");
780
781 if (f) {
782 fprintf(f, "const char *shader_blob_%s = {\nR\"(", sel->nir->info.name);
783 nir_print_shader(sel->nir, f);
784 fprintf(f, ")\";\n");
785 fclose(f);
786 }
787 }
788 if (!r)
789 return -2;
790 }
791
792 auto shader = convert.shader();
793
794 r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family,
795 rscreen->has_compressed_msaa_texturing);
796
797 r600::sfn_log << r600::SfnLog::shader_info
798 << "pipeshader->shader.processor_type = "
799 << pipeshader->shader.processor_type << "\n";
800
801 pipeshader->shader.bc.type = pipeshader->shader.processor_type;
802 pipeshader->shader.bc.isa = rctx->isa;
803
804 r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
805 if (!afs.lower(shader.m_ir)) {
806 R600_ERR("%s: Lowering to assembly failed\n", __func__);
807 return -1;
808 }
809
810 if (sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
811 r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
812 generate_gs_copy_shader(rctx, pipeshader, &sel->so);
813 assert(pipeshader->gs_copy_shader);
814 } else {
815 r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
816 }
817
818 return 0;
819 }