src/gallium/drivers/r600/sfn/sfn_nir.cpp

   1 /* -*- mesa-c++  -*-
   2  *
   3  * Copyright (c) 2019 Collabora LTD
   4  *
   5  * Author: Gert Wollny <gert.wollny@collabora.com>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * on the rights to use, copy, modify, merge, publish, distribute, sub
  11  * license, and/or sell copies of the Software, and to permit persons to whom
  12  * the Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 #include "sfn_nir.h"
  28 #include "nir_builder.h"
  29
  30 #include "../r600_pipe.h"
  31 #include "../r600_shader.h"
  32
  33 #include "sfn_instruction_tex.h"
  34
  35 #include "sfn_shader_vertex.h"
  36 #include "sfn_shader_fragment.h"
  37 #include "sfn_shader_geometry.h"
  38 #include "sfn_shader_compute.h"
  39 #include "sfn_shader_tcs.h"
  40 #include "sfn_shader_tess_eval.h"
  41 #include "sfn_nir_lower_fs_out_to_vector.h"
  42 #include "sfn_ir_to_assembly.h"
  43
  44 #include <vector>
  45
  46 namespace r600 {
  47
  48 using std::vector;
  49
  50 ShaderFromNir::ShaderFromNir():sh(nullptr),
  51    m_current_if_id(0),
  52    m_current_loop_id(0)
  53 {
  54 }
  55
  56 bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
  57                           r600_pipe_shader_selector *sel, r600_shader_key& key,
  58                           struct r600_shader* gs_shader, enum chip_class _chip_class)
  59 {
  60    sh = shader;
  61    chip_class = _chip_class;
  62    assert(sh);
  63
  64    switch (shader->info.stage) {
  65    case MESA_SHADER_VERTEX:
  66       impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
  67       break;
  68    case MESA_SHADER_TESS_CTRL:
  69       sfn_log << SfnLog::trans << "Start TCS\n";
  70       impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, chip_class));
  71       break;
  72    case MESA_SHADER_TESS_EVAL:
  73       sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
  74       impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
  75       break;
  76    case MESA_SHADER_GEOMETRY:
  77       sfn_log << SfnLog::trans << "Start GS\n";
  78       impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, chip_class));
  79       break;
  80    case MESA_SHADER_FRAGMENT:
  81       sfn_log << SfnLog::trans << "Start FS\n";
  82       impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, chip_class));
  83       break;
  84    case MESA_SHADER_COMPUTE:
  85       sfn_log << SfnLog::trans << "Start CS\n";
  86       impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, chip_class));
  87       break;
  88    default:
  89       return false;
  90    }
  91
  92    sfn_log << SfnLog::trans << "Process declarations\n";
  93    if (!process_declaration())
  94       return false;
  95
  96    // at this point all functions should be inlined
  97    const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
  98
  99    sfn_log << SfnLog::trans << "Scan shader\n";
 100    nir_foreach_block(block, func->impl) {
 101       nir_foreach_instr(instr, block) {
 102          if (!impl->scan_instruction(instr)) {
 103             fprintf(stderr, "Unhandled sysvalue access ");
 104             nir_print_instr(instr, stderr);
 105             fprintf(stderr, "\n");
 106             return false;
 107          }
 108       }
 109    }
 110
 111    sfn_log << SfnLog::trans << "Reserve registers\n";
 112    if (!impl->allocate_reserved_registers()) {
 113       return false;
 114    }
 115
 116    ValuePool::array_list arrays;
 117    sfn_log << SfnLog::trans << "Allocate local registers\n";
 118    foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
 119       impl->allocate_local_register(*reg, arrays);
 120    }
 121
 122    sfn_log << SfnLog::trans << "Emit shader start\n";
 123    impl->allocate_arrays(arrays);
 124
 125    impl->emit_shader_start();
 126
 127    sfn_log << SfnLog::trans << "Process shader \n";
 128    foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
 129       if (!process_cf_node(node))
 130          return false;
 131    }
 132
 133    // Add optimizations here
 134    sfn_log << SfnLog::trans << "Finalize\n";
 135    impl->finalize();
 136
 137    if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
 138       sfn_log << SfnLog::trans << "Merge registers\n";
 139       impl->remap_registers();
 140    }
 141    sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
 142    return true;
 143 }
 144
 145 Shader ShaderFromNir::shader() const
 146 {
 147    return Shader{impl->m_output, impl->get_temp_registers()};
 148 }
 149
 150
 151 bool ShaderFromNir::process_cf_node(nir_cf_node *node)
 152 {
 153    SFN_TRACE_FUNC(SfnLog::flow, "CF");
 154    switch (node->type) {
 155    case nir_cf_node_block:
 156       return process_block(nir_cf_node_as_block(node));
 157    case nir_cf_node_if:
 158       return process_if(nir_cf_node_as_if(node));
 159    case nir_cf_node_loop:
 160       return process_loop(nir_cf_node_as_loop(node));
 161    default:
 162       return false;
 163    }
 164 }
 165
 166 bool ShaderFromNir::process_if(nir_if *if_stmt)
 167 {
 168    SFN_TRACE_FUNC(SfnLog::flow, "IF");
 169
 170    if (!impl->emit_if_start(m_current_if_id, if_stmt))
 171       return false;
 172
 173    int if_id = m_current_if_id++;
 174    m_if_stack.push(if_id);
 175
 176    foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
 177          if (!process_cf_node(n)) return false;
 178
 179    if (!if_stmt->then_list.is_empty()) {
 180       if (!impl->emit_else_start(if_id))
 181          return false;
 182
 183       foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
 184             if (!process_cf_node(n)) return false;
 185    }
 186
 187    if (!impl->emit_ifelse_end(if_id))
 188       return false;
 189
 190    m_if_stack.pop();
 191    return true;
 192 }
 193
 194 bool ShaderFromNir::process_loop(nir_loop *node)
 195 {
 196    SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
 197    int loop_id = m_current_loop_id++;
 198
 199    if (!impl->emit_loop_start(loop_id))
 200       return false;
 201
 202    foreach_list_typed(nir_cf_node, n, node, &node->body)
 203          if (!process_cf_node(n)) return false;
 204
 205    if (!impl->emit_loop_end(loop_id))
 206       return false;
 207
 208    return true;
 209 }
 210
 211 bool ShaderFromNir::process_block(nir_block *block)
 212 {
 213    SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
 214    nir_foreach_instr(instr, block) {
 215       int r = emit_instruction(instr);
 216       if (!r) {
 217          sfn_log << SfnLog::err << "R600: Unsupported instruction: "
 218                  << *instr << "\n";
 219          return false;
 220       }
 221    }
 222    return true;
 223 }
 224
 225
 226 ShaderFromNir::~ShaderFromNir()
 227 {
 228 }
 229
 230 pipe_shader_type ShaderFromNir::processor_type() const
 231 {
 232    return impl->m_processor_type;
 233 }
 234
 235
 236 bool ShaderFromNir::emit_instruction(nir_instr *instr)
 237 {
 238    assert(impl);
 239
 240    sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
 241
 242    switch (instr->type) {
 243    case nir_instr_type_alu:
 244       return impl->emit_alu_instruction(instr);
 245    case nir_instr_type_deref:
 246       return impl->emit_deref_instruction(nir_instr_as_deref(instr));
 247    case nir_instr_type_intrinsic:
 248       return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
 249    case nir_instr_type_load_const:
 250       return impl->set_literal_constant(nir_instr_as_load_const(instr));
 251    case nir_instr_type_tex:
 252       return impl->emit_tex_instruction(instr);
 253    case nir_instr_type_jump:
 254       return impl->emit_jump_instruction(nir_instr_as_jump(instr));
 255    default:
 256       fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
 257       nir_print_instr(instr, stderr);
 258       fprintf(stderr, "'\n");
 259       return false;
 260    case nir_instr_type_ssa_undef:
 261       return impl->create_undef(nir_instr_as_ssa_undef(instr));
 262       return true;
 263    }
 264 }
 265
 266 bool ShaderFromNir::process_declaration()
 267 {
 268    // scan declarations
 269    nir_foreach_shader_in_variable(variable, sh) {
 270       if (!impl->process_inputs(variable)) {
 271          fprintf(stderr, "R600: error parsing input varible %s\n", variable->name);
 272          return false;
 273       }
 274    }
 275
 276    // scan declarations
 277    nir_foreach_shader_out_variable(variable, sh) {
 278       if (!impl->process_outputs(variable)) {
 279          fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
 280          return false;
 281       }
 282    }
 283
 284    // scan declarations
 285    nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
 286                                                  nir_var_mem_ubo |
 287                                                  nir_var_mem_ssbo) {
 288       if (!impl->process_uniforms(variable)) {
 289          fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
 290          return false;
 291       }
 292    }
 293
 294    return true;
 295 }
 296
 297 const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
 298 {
 299    assert(impl);
 300    return impl->m_output;
 301 }
 302
 303
 304 AssemblyFromShader::~AssemblyFromShader()
 305 {
 306 }
 307
 308 bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
 309 {
 310    return do_lower(ir);
 311 }
 312
 313 static nir_ssa_def *
 314 r600_nir_lower_pack_unpack_2x16_impl(nir_builder *b, nir_instr *instr, void *_options)
 315 {
 316    nir_alu_instr *alu = nir_instr_as_alu(instr);
 317
 318    switch (alu->op) {
 319    case nir_op_unpack_half_2x16: {
 320       nir_ssa_def *packed = nir_ssa_for_alu_src(b, alu, 0);
 321       return  nir_vec2(b, nir_unpack_half_2x16_split_x(b, packed),
 322                        nir_unpack_half_2x16_split_y(b, packed));
 323
 324    }
 325    case nir_op_pack_half_2x16: {
 326       nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, alu, 0);
 327       return nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
 328                                       nir_channel(b, src_vec2, 1));
 329    }
 330    default:
 331       return nullptr;
 332    }
 333 }
 334
 335 bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr *instr, const void *_options)
 336 {
 337    return instr->type == nir_instr_type_alu;
 338 }
 339
 340 bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader)
 341 {
 342    return nir_shader_lower_instructions(shader,
 343                                         r600_nir_lower_pack_unpack_2x16_filter,
 344                                         r600_nir_lower_pack_unpack_2x16_impl,
 345                                         nullptr);
 346 };
 347
 348 static void
 349 r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
 350 {
 351    b->cursor = nir_before_instr(&instr->instr);
 352
 353    int address_index = 0;
 354    int align;
 355
 356    if (instr->intrinsic == nir_intrinsic_store_scratch) {
 357       align  = instr->src[0].ssa->num_components;
 358       address_index = 1;
 359    } else{
 360       align = instr->dest.ssa.num_components;
 361    }
 362
 363    nir_ssa_def *address = instr->src[address_index].ssa;
 364    nir_ssa_def *new_address = nir_ishr(b, address,  nir_imm_int(b, 4 * align));
 365
 366    nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
 367                          nir_src_for_ssa(new_address));
 368 }
 369
 370 bool r600_lower_scratch_addresses(nir_shader *shader)
 371 {
 372    bool progress = false;
 373    nir_foreach_function(function, shader) {
 374       nir_builder build;
 375       nir_builder_init(&build, function->impl);
 376
 377       nir_foreach_block(block, function->impl) {
 378          nir_foreach_instr(instr, block) {
 379             if (instr->type != nir_instr_type_intrinsic)
 380                continue;
 381             nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
 382             if (op->intrinsic != nir_intrinsic_load_scratch &&
 383                 op->intrinsic != nir_intrinsic_store_scratch)
 384                continue;
 385             r600_nir_lower_scratch_address_impl(&build, op);
 386             progress = true;
 387          }
 388       }
 389    }
 390    return progress;
 391 }
 392
 393 static nir_ssa_def *
 394 r600_lower_ubo_to_align16_impl(nir_builder *b, nir_instr *instr, void *_options)
 395 {
 396    b->cursor = nir_before_instr(instr);
 397
 398    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
 399    assert(op->intrinsic == nir_intrinsic_load_ubo);
 400
 401    bool const_address = (nir_src_is_const(op->src[1]) && nir_src_is_const(op->src[0]));
 402
 403    nir_ssa_def *offset = op->src[1].ssa;
 404
 405    /* This is ugly: With const addressing we can actually set a proper fetch target mask,
 406     * but for this we need the component encoded, we don't shift and do de decoding in the
 407     * backend. Otherwise we shift by four and resolve the component here
 408     * (TODO: encode the start component in the intrinsic when the offset base is non-constant
 409     * but a multiple of 16 */
 410
 411    nir_ssa_def *new_offset = offset;
 412    if (!const_address)
 413       new_offset = nir_ishr(b, offset,  nir_imm_int(b, 4));
 414
 415    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo_r600);
 416    load->num_components = const_address ? op->num_components : 4;
 417    load->src[0] = op->src[0];
 418    load->src[1] = nir_src_for_ssa(new_offset);
 419    nir_intrinsic_set_align(load, nir_intrinsic_align_mul(op), nir_intrinsic_align_offset(op));
 420
 421    nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL);
 422    nir_builder_instr_insert(b, &load->instr);
 423
 424    /* when four components are loaded or both the offset and the location
 425     * are constant, then the backend can deal with it better */
 426    if (op->num_components == 4 || const_address)
 427       return &load->dest.ssa;
 428
 429    /* What comes below is a performance disaster when the offset is not constant
 430     * because then we have to assume that any component can be the first one and we
 431     * have to pick the result manually. */
 432    nir_ssa_def *first_comp = nir_iand(b, nir_ishr(b, offset,  nir_imm_int(b, 2)),
 433                                       nir_imm_int(b,3));
 434
 435    const unsigned swz_000[4] = {0, 0, 0};
 436    nir_ssa_def *component_select = nir_ieq(b, r600_imm_ivec3(b, 0, 1, 2),
 437                                            nir_swizzle(b, first_comp, swz_000, 3));
 438
 439    if (op->num_components == 1) {
 440       nir_ssa_def *check0 = nir_bcsel(b, nir_channel(b, component_select, 0),
 441                                       nir_channel(b, &load->dest.ssa, 0),
 442                                       nir_channel(b, &load->dest.ssa, 3));
 443       nir_ssa_def *check1 = nir_bcsel(b, nir_channel(b, component_select, 1),
 444                                       nir_channel(b, &load->dest.ssa, 1),
 445                                       check0);
 446       return nir_bcsel(b, nir_channel(b, component_select, 2),
 447                        nir_channel(b, &load->dest.ssa, 2),
 448                        check1);
 449    } else if (op->num_components == 2) {
 450       const unsigned szw_01[2] = {0, 1};
 451       const unsigned szw_12[2] = {1, 2};
 452       const unsigned szw_23[2] = {2, 3};
 453
 454       nir_ssa_def *check0 = nir_bcsel(b, nir_channel(b, component_select, 0),
 455                                       nir_swizzle(b, &load->dest.ssa, szw_01, 2),
 456                                       nir_swizzle(b, &load->dest.ssa, szw_23, 2));
 457       return nir_bcsel(b, nir_channel(b, component_select, 1),
 458                                       nir_swizzle(b, &load->dest.ssa, szw_12, 2),
 459                                       check0);
 460    } else {
 461       const unsigned szw_012[3] = {0, 1, 2};
 462       const unsigned szw_123[3] = {1, 2, 3};
 463       return nir_bcsel(b, nir_channel(b, component_select, 0),
 464                        nir_swizzle(b, &load->dest.ssa, szw_012, 3),
 465                        nir_swizzle(b, &load->dest.ssa, szw_123, 3));
 466    }
 467 }
 468
 469 bool r600_lower_ubo_to_align16_filter(const nir_instr *instr, const void *_options)
 470 {
 471    if (instr->type != nir_instr_type_intrinsic)
 472       return false;
 473
 474    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
 475    return op->intrinsic == nir_intrinsic_load_ubo;
 476 }
 477
 478
 479 bool r600_lower_ubo_to_align16(nir_shader *shader)
 480 {
 481    return nir_shader_lower_instructions(shader,
 482                                         r600_lower_ubo_to_align16_filter,
 483                                         r600_lower_ubo_to_align16_impl,
 484                                         nullptr);
 485 }
 486
 487 static void
 488 insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
 489 {
 490    nir_foreach_variable_in_list(var, var_list) {
 491       if (var->data.binding > new_var->data.binding ||
 492           (var->data.binding == new_var->data.binding &&
 493            var->data.offset > new_var->data.offset)) {
 494          exec_node_insert_node_before(&var->node, &new_var->node);
 495          return;
 496       }
 497    }
 498    exec_list_push_tail(var_list, &new_var->node);
 499 }
 500
 501 void sort_uniforms(nir_shader *shader)
 502 {
 503    struct exec_list new_list;
 504    exec_list_make_empty(&new_list);
 505
 506    nir_foreach_uniform_variable_safe(var, shader) {
 507       exec_node_remove(&var->node);
 508       insert_uniform_sorted(&new_list, var);
 509    }
 510    exec_list_append(&shader->variables, &new_list);
 511 }
 512
 513 }
 514
 515 static nir_intrinsic_op
 516 r600_map_atomic(nir_intrinsic_op op)
 517 {
 518    switch (op) {
 519    case nir_intrinsic_atomic_counter_read_deref:
 520       return nir_intrinsic_atomic_counter_read;
 521    case nir_intrinsic_atomic_counter_inc_deref:
 522       return nir_intrinsic_atomic_counter_inc;
 523    case nir_intrinsic_atomic_counter_pre_dec_deref:
 524       return nir_intrinsic_atomic_counter_pre_dec;
 525    case nir_intrinsic_atomic_counter_post_dec_deref:
 526       return nir_intrinsic_atomic_counter_post_dec;
 527    case nir_intrinsic_atomic_counter_add_deref:
 528       return nir_intrinsic_atomic_counter_add;
 529    case nir_intrinsic_atomic_counter_min_deref:
 530       return nir_intrinsic_atomic_counter_min;
 531    case nir_intrinsic_atomic_counter_max_deref:
 532       return nir_intrinsic_atomic_counter_max;
 533    case nir_intrinsic_atomic_counter_and_deref:
 534       return nir_intrinsic_atomic_counter_and;
 535    case nir_intrinsic_atomic_counter_or_deref:
 536       return nir_intrinsic_atomic_counter_or;
 537    case nir_intrinsic_atomic_counter_xor_deref:
 538       return nir_intrinsic_atomic_counter_xor;
 539    case nir_intrinsic_atomic_counter_exchange_deref:
 540       return nir_intrinsic_atomic_counter_exchange;
 541    case nir_intrinsic_atomic_counter_comp_swap_deref:
 542       return nir_intrinsic_atomic_counter_comp_swap;
 543    default:
 544       return nir_num_intrinsics;
 545    }
 546 }
 547
 548 static bool
 549 r600_lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr,
 550                        nir_shader *shader)
 551 {
 552    nir_intrinsic_op op = r600_map_atomic(instr->intrinsic);
 553    if (nir_num_intrinsics == op)
 554       return false;
 555
 556    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
 557    nir_variable *var = nir_deref_instr_get_variable(deref);
 558
 559    if (var->data.mode != nir_var_uniform &&
 560        var->data.mode != nir_var_mem_ssbo &&
 561        var->data.mode != nir_var_mem_shared)
 562       return false; /* atomics passed as function arguments can't be lowered */
 563
 564    const unsigned idx = var->data.binding;
 565
 566    b->cursor = nir_before_instr(&instr->instr);
 567
 568    nir_ssa_def *offset = nir_imm_int(b, var->data.index);
 569    for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
 570         d = nir_deref_instr_parent(d)) {
 571       assert(d->deref_type == nir_deref_type_array);
 572       assert(d->arr.index.is_ssa);
 573
 574       unsigned array_stride = 1;
 575       if (glsl_type_is_array(d->type))
 576          array_stride *= glsl_get_aoa_size(d->type);
 577
 578       offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa,
 579                                             nir_imm_int(b, array_stride)));
 580    }
 581
 582    /* Since the first source is a deref and the first source in the lowered
 583     * instruction is the offset, we can just swap it out and change the
 584     * opcode.
 585     */
 586    instr->intrinsic = op;
 587    nir_instr_rewrite_src(&instr->instr, &instr->src[0],
 588                          nir_src_for_ssa(offset));
 589    nir_intrinsic_set_base(instr, idx);
 590
 591    nir_deref_instr_remove_if_unused(deref);
 592
 593    return true;
 594 }
 595
 596 static bool
 597 r600_nir_lower_atomics(nir_shader *shader)
 598 {
 599    bool progress = false;
 600
 601    /* First re-do the offsets, in Hardware we start at zero for each new
 602     * binding, and we use an offset of one per counter */
 603    int current_binding = -1;
 604    int current_offset = 0;
 605    nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
 606       if (!var->type->contains_atomic())
 607          continue;
 608
 609       if (current_binding == (int)var->data.binding) {
 610          var->data.index = current_offset;
 611          current_offset += var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
 612       } else {
 613          current_binding = var->data.binding;
 614          var->data.index = 0;
 615          current_offset = var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
 616       }
 617    }
 618
 619    nir_foreach_function(function, shader) {
 620       if (!function->impl)
 621          continue;
 622
 623       bool impl_progress = false;
 624
 625       nir_builder build;
 626       nir_builder_init(&build, function->impl);
 627
 628       nir_foreach_block(block, function->impl) {
 629          nir_foreach_instr_safe(instr, block) {
 630             if (instr->type != nir_instr_type_intrinsic)
 631                continue;
 632
 633             impl_progress |= r600_lower_deref_instr(&build,
 634                                                     nir_instr_as_intrinsic(instr), shader);
 635          }
 636       }
 637
 638       if (impl_progress) {
 639          nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
 640          progress = true;
 641       }
 642    }
 643
 644    return progress;
 645 }
 646 using r600::r600_nir_lower_int_tg4;
 647 using r600::r600_nir_lower_pack_unpack_2x16;
 648 using r600::r600_lower_scratch_addresses;
 649 using r600::r600_lower_fs_out_to_vector;
 650 using r600::r600_lower_ubo_to_align16;
 651
 652 int
 653 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
 654 {
 655    return glsl_count_vec4_slots(type, false, is_bindless);
 656 }
 657
 658 void
 659 r600_get_natural_size_align_bytes(const struct glsl_type *type,
 660                                   unsigned *size, unsigned *align)
 661 {
 662    if (type->base_type != GLSL_TYPE_ARRAY) {
 663       *align = 1;
 664       *size = 1;
 665    } else {
 666       unsigned elem_size, elem_align;
 667       glsl_get_natural_size_align_bytes(type->fields.array,
 668                                         &elem_size, &elem_align);
 669       *align = 1;
 670       *size = type->length;
 671    }
 672 }
 673
 674 static bool
 675 r600_lower_shared_io_impl(nir_function *func)
 676 {
 677    nir_builder b;
 678    nir_builder_init(&b, func->impl);
 679
 680    bool progress = false;
 681    nir_foreach_block(block, func->impl) {
 682       nir_foreach_instr_safe(instr, block) {
 683
 684          if (instr->type != nir_instr_type_intrinsic)
 685             continue;
 686
 687          nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
 688          if (op->intrinsic != nir_intrinsic_load_shared &&
 689              op->intrinsic != nir_intrinsic_store_shared)
 690             continue;
 691
 692          b.cursor = nir_before_instr(instr);
 693
 694          if (op->intrinsic == nir_intrinsic_load_shared) {
 695             nir_ssa_def *addr = op->src[0].ssa;
 696
 697             switch (nir_dest_num_components(op->dest)) {
 698             case 2: {
 699                auto addr2 = nir_iadd_imm(&b, addr, 4);
 700                addr = nir_vec2(&b, addr, addr2);
 701                break;
 702             }
 703             case 3: {
 704                auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
 705                addr = nir_vec3(&b, addr,
 706                                nir_channel(&b, addr2, 0),
 707                                nir_channel(&b, addr2, 1));
 708                break;
 709             }
 710             case 4: {
 711                addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
 712                break;
 713             }
 714             }
 715
 716             auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
 717             load->num_components = nir_dest_num_components(op->dest);
 718             load->src[0] = nir_src_for_ssa(addr);
 719             nir_ssa_dest_init(&load->instr, &load->dest,
 720                               load->num_components, 32, NULL);
 721             nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
 722             nir_builder_instr_insert(&b, &load->instr);
 723          } else {
 724             nir_ssa_def *addr = op->src[1].ssa;
 725             for (int i = 0; i < 2; ++i) {
 726                unsigned test_mask = (0x3 << 2 * i);
 727                if (!(nir_intrinsic_write_mask(op) & test_mask))
 728                   continue;
 729
 730                auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
 731                unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
 732                nir_intrinsic_set_write_mask(store, writemask);
 733                store->src[0] = nir_src_for_ssa(op->src[0].ssa);
 734                store->num_components = store->src[0].ssa->num_components;
 735                bool start_even = (writemask & (1u << (2 * i)));
 736
 737                auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
 738                store->src[1] = nir_src_for_ssa(addr2);
 739
 740                nir_builder_instr_insert(&b, &store->instr);
 741             }
 742          }
 743          nir_instr_remove(instr);
 744          progress = true;
 745       }
 746    }
 747    return progress;
 748 }
 749
 750 static bool
 751 r600_lower_shared_io(nir_shader *nir)
 752 {
 753         bool progress=false;
 754         nir_foreach_function(function, nir) {
 755                 if (function->impl &&
 756                          r600_lower_shared_io_impl(function))
 757                         progress = true;
 758         }
 759         return progress;
 760 }
 761
 762 static bool
 763 optimize_once(nir_shader *shader)
 764 {
 765    bool progress = false;
 766    NIR_PASS(progress, shader, nir_copy_prop);
 767    NIR_PASS(progress, shader, nir_opt_dce);
 768    NIR_PASS(progress, shader, nir_opt_algebraic);
 769    NIR_PASS(progress, shader, nir_opt_constant_folding);
 770    NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
 771    NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
 772
 773    NIR_PASS(progress, shader, nir_opt_remove_phis);
 774
 775    if (nir_opt_trivial_continues(shader)) {
 776            progress = true;
 777            NIR_PASS(progress, shader, nir_copy_prop);
 778            NIR_PASS(progress, shader, nir_opt_dce);
 779    }
 780
 781    NIR_PASS(progress, shader, nir_opt_if, false);
 782    NIR_PASS(progress, shader, nir_opt_dead_cf);
 783    NIR_PASS(progress, shader, nir_opt_cse);
 784    NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
 785
 786    NIR_PASS(progress, shader, nir_opt_conditional_discard);
 787    NIR_PASS(progress, shader, nir_opt_dce);
 788    NIR_PASS(progress, shader, nir_opt_undef);
 789    return progress;
 790 }
 791
 792 bool has_saturate(const nir_function *func)
 793 {
 794    nir_foreach_block(block, func->impl) {
 795       nir_foreach_instr(instr, block) {
 796          if (instr->type == nir_instr_type_alu) {
 797             auto alu = nir_instr_as_alu(instr);
 798             if (alu->dest.saturate)
 799                return true;
 800          }
 801       }
 802    }
 803    return false;
 804 }
 805
 806 int r600_shader_from_nir(struct r600_context *rctx,
 807                          struct r600_pipe_shader *pipeshader,
 808                          r600_shader_key *key)
 809 {
 810    char filename[4000];
 811    struct r600_pipe_shader_selector *sel = pipeshader->selector;
 812
 813    r600::ShaderFromNir convert;
 814
 815    if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
 816       fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
 817       nir_print_shader(sel->nir, stderr);
 818       fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
 819    }
 820
 821    r600::sort_uniforms(sel->nir);
 822
 823    NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
 824    NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
 825    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
 826
 827    NIR_PASS_V(sel->nir, r600_lower_shared_io);
 828    NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
 829
 830    static const struct nir_lower_tex_options lower_tex_options = {
 831       .lower_txp = ~0u,
 832    };
 833    NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
 834    NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
 835
 836    NIR_PASS_V(sel->nir, r600_nir_lower_int_tg4);
 837    NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
 838
 839    NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, r600_glsl_type_size,
 840               nir_lower_io_lower_64bit_to_32);
 841
 842    if (sel->nir->info.stage == MESA_SHADER_VERTEX)
 843       NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
 844
 845    if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
 846       NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
 847
 848    if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
 849        (sel->nir->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
 850       NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_out, r600_glsl_type_size,
 851                  nir_lower_io_lower_64bit_to_32);
 852       NIR_PASS_V(sel->nir, r600_lower_tess_io, (pipe_prim_type)key->tcs.prim_mode);
 853    }
 854
 855    if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
 856        sel->nir->info.stage == MESA_SHADER_TESS_EVAL) {
 857       NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, r600_glsl_type_size,
 858                  nir_lower_io_lower_64bit_to_32);
 859    }
 860
 861    if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
 862        sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
 863        (sel->nir->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
 864       auto prim_type = sel->nir->info.stage == MESA_SHADER_TESS_CTRL ?
 865                           key->tcs.prim_mode : sel->nir->info.tess.primitive_mode;
 866       NIR_PASS_V(sel->nir, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type));
 867    }
 868
 869
 870    if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL)
 871       NIR_PASS_V(sel->nir, r600_append_tcs_TF_emission,
 872                  (pipe_prim_type)key->tcs.prim_mode);
 873
 874
 875    const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sel->nir->functions));
 876    bool optimize = func->impl->registers.length() == 0 && !has_saturate(func);
 877
 878    if (optimize) {
 879       optimize_once(sel->nir);
 880       NIR_PASS_V(sel->nir, r600_lower_ubo_to_align16);
 881    }
 882    /* It seems the output of this optimization is cached somewhere, and
 883     * when there are registers, then we can no longer copy propagate, so
 884     * skip the optimization then. (There is probably a better way, but yeah)
 885     */
 886    if (optimize)
 887       while(optimize_once(sel->nir));
 888
 889    NIR_PASS_V(sel->nir, nir_remove_dead_variables, nir_var_shader_in, NULL);
 890    NIR_PASS_V(sel->nir, nir_remove_dead_variables,  nir_var_shader_out, NULL);
 891
 892
 893    NIR_PASS_V(sel->nir, nir_lower_vars_to_scratch,
 894               nir_var_function_temp,
 895               40,
 896               r600_get_natural_size_align_bytes);
 897
 898    while (optimize && optimize_once(sel->nir));
 899
 900    NIR_PASS_V(sel->nir, nir_lower_locals_to_regs);
 901    //NIR_PASS_V(sel->nir, nir_opt_algebraic);
 902    //NIR_PASS_V(sel->nir, nir_copy_prop);
 903    NIR_PASS_V(sel->nir, nir_lower_to_source_mods, nir_lower_float_source_mods);
 904    NIR_PASS_V(sel->nir, nir_convert_from_ssa, true);
 905    NIR_PASS_V(sel->nir, nir_opt_dce);
 906
 907    if ((rctx->screen->b.debug_flags & DBG_NIR) &&
 908        (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
 909       fprintf(stderr, "-- NIR --------------------------------------------------------\n");
 910       struct nir_function *func = (struct nir_function *)exec_list_get_head(&sel->nir->functions);
 911       nir_index_ssa_defs(func->impl);
 912       nir_print_shader(sel->nir, stderr);
 913       fprintf(stderr, "-- END --------------------------------------------------------\n");
 914    }
 915
 916    memset(&pipeshader->shader, 0, sizeof(r600_shader));
 917    pipeshader->scratch_space_needed = sel->nir->scratch_size;
 918
 919    if (sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
 920        sel->nir->info.stage == MESA_SHADER_VERTEX ||
 921        sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
 922       pipeshader->shader.clip_dist_write |= ((1 << sel->nir->info.clip_distance_array_size) - 1);
 923       pipeshader->shader.cull_dist_write = ((1 << sel->nir->info.cull_distance_array_size) - 1)
 924                                            << sel->nir->info.clip_distance_array_size;
 925       pipeshader->shader.cc_dist_mask = (1 <<  (sel->nir->info.cull_distance_array_size +
 926                                                 sel->nir->info.clip_distance_array_size)) - 1;
 927    }
 928
 929    struct r600_shader* gs_shader = nullptr;
 930    if (rctx->gs_shader)
 931       gs_shader = &rctx->gs_shader->current->shader;
 932    r600_screen *rscreen = rctx->screen;
 933
 934    bool r = convert.lower(sel->nir, pipeshader, sel, *key, gs_shader, rscreen->b.chip_class);
 935    if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
 936       static int shnr = 0;
 937
 938       snprintf(filename, 4000, "nir-%s_%d.inc", sel->nir->info.name, shnr++);
 939
 940       if (access(filename, F_OK) == -1) {
 941          FILE *f = fopen(filename, "w");
 942
 943          if (f) {
 944             fprintf(f, "const char *shader_blob_%s = {\nR\"(", sel->nir->info.name);
 945             nir_print_shader(sel->nir, f);
 946             fprintf(f, ")\";\n");
 947             fclose(f);
 948          }
 949       }
 950       if (!r)
 951          return -2;
 952    }
 953
 954    auto shader = convert.shader();
 955
 956    r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family,
 957                       rscreen->has_compressed_msaa_texturing);
 958
 959    r600::sfn_log << r600::SfnLog::shader_info
 960                  << "pipeshader->shader.processor_type = "
 961                  << pipeshader->shader.processor_type << "\n";
 962
 963    pipeshader->shader.bc.type = pipeshader->shader.processor_type;
 964    pipeshader->shader.bc.isa = rctx->isa;
 965
 966    r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
 967    if (!afs.lower(shader.m_ir)) {
 968       R600_ERR("%s: Lowering to assembly failed\n", __func__);
 969       return -1;
 970    }
 971
 972    if (sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
 973       r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
 974       generate_gs_copy_shader(rctx, pipeshader, &sel->so);
 975       assert(pipeshader->gs_copy_shader);
 976    } else {
 977       r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
 978    }
 979    if (pipeshader->shader.bc.ngpr < 4)
 980       pipeshader->shader.bc.ngpr = 4;
 981
 982    return 0;
 983 }