src/gallium/drivers/r600/sfn/sfn_shader_base.cpp

   1 /* -*- mesa-c++  -*-
   2  *
   3  * Copyright (c) 2018 Collabora LTD
   4  *
   5  * Author: Gert Wollny <gert.wollny@collabora.com>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * on the rights to use, copy, modify, merge, publish, distribute, sub
  11  * license, and/or sell copies of the Software, and to permit persons to whom
  12  * the Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 #include "../r600_pipe.h"
  28 #include "../r600_shader.h"
  29 #include "sfn_shader_vertex.h"
  30
  31 #include "sfn_shader_compute.h"
  32 #include "sfn_shader_fragment.h"
  33 #include "sfn_shader_geometry.h"
  34 #include "sfn_liverange.h"
  35 #include "sfn_ir_to_assembly.h"
  36 #include "sfn_nir.h"
  37 #include "sfn_instruction_misc.h"
  38 #include "sfn_instruction_fetch.h"
  39 #include "sfn_instruction_lds.h"
  40
  41 #include <iostream>
  42
  43 #define ENABLE_DEBUG 1
  44
  45 #ifdef ENABLE_DEBUG
  46 #define DEBUG_SFN(X)  \
  47    do {\
  48       X; \
  49    } while (0)
  50 #else
  51 #define DEBUG_SFN(X)
  52 #endif
  53
  54 namespace r600 {
  55
  56 using namespace std;
  57
  58
  59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
  60                                                r600_pipe_shader_selector& sel,
  61                                                r600_shader &sh_info, int scratch_size,
  62                                                enum chip_class chip_class,
  63                                                int atomic_base):
  64    m_processor_type(ptype),
  65    m_nesting_depth(0),
  66    m_block_number(0),
  67    m_export_output(0, -1),
  68    m_sh_info(sh_info),
  69    m_chip_class(chip_class),
  70    m_tex_instr(*this),
  71    m_alu_instr(*this),
  72    m_ssbo_instr(*this),
  73    m_pending_else(nullptr),
  74    m_scratch_size(scratch_size),
  75    m_next_hwatomic_loc(0),
  76    m_sel(sel),
  77    m_atomic_base(atomic_base),
  78    m_image_count(0)
  79
  80 {
  81    m_sh_info.processor_type = ptype;
  82
  83 }
  84
  85
  86 ShaderFromNirProcessor::~ShaderFromNirProcessor()
  87 {
  88 }
  89
  90 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
  91 {
  92    switch (instr->type) {
  93    case nir_instr_type_tex: {
  94       nir_tex_instr *t = nir_instr_as_tex(instr);
  95       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
  96          sh_info().uses_tex_buffers = true;
  97       if (t->op == nir_texop_txs &&
  98           t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
  99           t->is_array)
 100          sh_info().has_txq_cube_array_z_comp = true;
 101       break;
 102    }
 103    case nir_instr_type_intrinsic: {
 104       auto *i = nir_instr_as_intrinsic(instr);
 105       switch (i->intrinsic) {
 106       case nir_intrinsic_ssbo_atomic_add:
 107       case nir_intrinsic_image_atomic_add:
 108       case nir_intrinsic_ssbo_atomic_and:
 109       case nir_intrinsic_image_atomic_and:
 110       case nir_intrinsic_ssbo_atomic_or:
 111       case nir_intrinsic_image_atomic_or:
 112       case nir_intrinsic_ssbo_atomic_imin:
 113       case nir_intrinsic_image_atomic_imin:
 114       case nir_intrinsic_ssbo_atomic_imax:
 115       case nir_intrinsic_image_atomic_imax:
 116       case nir_intrinsic_ssbo_atomic_umin:
 117       case nir_intrinsic_image_atomic_umin:
 118       case nir_intrinsic_ssbo_atomic_umax:
 119       case nir_intrinsic_image_atomic_umax:
 120       case nir_intrinsic_image_atomic_xor:
 121       case nir_intrinsic_image_atomic_exchange:
 122       case nir_intrinsic_image_atomic_comp_swap:
 123          m_sel.info.writes_memory = 1;
 124          /* fallthrough */
 125       case nir_intrinsic_image_load:
 126          m_ssbo_instr.set_require_rat_return_address();
 127          break;
 128       case nir_intrinsic_image_size: {
 129          if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
 130              nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
 131             sh_info().has_txq_cube_array_z_comp = true;
 132       }
 133
 134
 135
 136       default:
 137          ;
 138       }
 139
 140
 141    }
 142    default:
 143       ;
 144    }
 145
 146    return scan_sysvalue_access(instr);
 147 }
 148
 149 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
 150 {
 151   return m_chip_class;
 152 }
 153
 154 bool ShaderFromNirProcessor::allocate_reserved_registers()
 155 {
 156    bool retval = do_allocate_reserved_registers();
 157    m_ssbo_instr.load_rat_return_address();
 158    if (sh_info().uses_atomics)
 159       m_ssbo_instr.load_atomic_inc_limits();
 160    m_ssbo_instr.set_ssbo_offset(m_image_count);
 161    return retval;
 162 }
 163
 164 static void remap_shader_info(r600_shader& sh_info,
 165                               std::vector<rename_reg_pair>& map,
 166                               UNUSED ValueMap& values)
 167 {
 168    for (unsigned i = 0; i < sh_info.ninput; ++i) {
 169       sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
 170               << " of map.size()\n";
 171
 172       assert(sh_info.input[i].gpr < map.size());
 173       auto new_index = map[sh_info.input[i].gpr];
 174       if (new_index.valid)
 175          sh_info.input[i].gpr = new_index.new_reg;
 176       map[sh_info.input[i].gpr].used = true;
 177    }
 178
 179    for (unsigned i = 0; i < sh_info.noutput; ++i) {
 180       assert(sh_info.output[i].gpr < map.size());
 181       auto new_index = map[sh_info.output[i].gpr];
 182       if (new_index.valid)
 183          sh_info.output[i].gpr = new_index.new_reg;
 184       map[sh_info.output[i].gpr].used = true;
 185    }
 186 }
 187
 188 void ShaderFromNirProcessor::remap_registers()
 189 {
 190    // register renumbering
 191    auto rc = register_count();
 192    if (!rc)
 193       return;
 194
 195    std::vector<register_live_range> register_live_ranges(rc);
 196
 197    auto temp_register_map = get_temp_registers();
 198
 199    Shader sh{m_output, temp_register_map};
 200    LiverangeEvaluator().run(sh, register_live_ranges);
 201    auto register_map = get_temp_registers_remapping(register_live_ranges);
 202
 203    sfn_log << SfnLog::merge << "=========Mapping===========\n";
 204    for (size_t  i = 0; i < register_map.size(); ++i)
 205       if (register_map[i].valid)
 206          sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
 207
 208    ValueRemapper vmap0(register_map, temp_register_map);
 209    for (auto& block: m_output)
 210       block.remap_registers(vmap0);
 211
 212    remap_shader_info(m_sh_info, register_map, temp_register_map);
 213
 214    /* Mark inputs as used registers, these registers should no be remapped */
 215    for (auto& v: sh.m_temp) {
 216       if (v.second->type() == Value::gpr) {
 217          const auto& g = static_cast<const GPRValue&>(*v.second);
 218          if (g.is_input())
 219             register_map[g.sel()].used = true;
 220       }
 221    }
 222
 223    int new_index = 0;
 224    for (auto& i : register_map) {
 225       i.valid = i.used;
 226       if (i.used)
 227          i.new_reg = new_index++;
 228    }
 229
 230    ValueRemapper vmap1(register_map, temp_register_map);
 231    for (auto& ir: m_output)
 232       ir.remap_registers(vmap1);
 233
 234    remap_shader_info(m_sh_info, register_map, temp_register_map);
 235 }
 236
 237 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
 238 {
 239    // m_uniform_type_map
 240    m_uniform_type_map[uniform->data.location] = uniform->type;
 241
 242    if (uniform->type->contains_atomic()) {
 243       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
 244       sh_info().nhwatomic += natomics;
 245
 246       if (uniform->type->is_array())
 247          sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
 248
 249       sh_info().uses_atomics = 1;
 250
 251       struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
 252       ++sh_info().nhwatomic_ranges;
 253       atom.buffer_id = uniform->data.binding;
 254       atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
 255       atom.start = m_next_hwatomic_loc;
 256       atom.end = atom.start + natomics - 1;
 257       m_next_hwatomic_loc = atom.end + 1;
 258       //atom.array_id = uniform->type->is_array() ? 1 : 0;
 259
 260       m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
 261
 262       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
 263               << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
 264    }
 265
 266    if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
 267       sh_info().uses_images = 1;
 268    }
 269
 270    if (uniform->type->is_image()) {
 271       ++m_image_count;
 272    }
 273
 274    return true;
 275 }
 276
 277 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
 278 {
 279    return do_process_inputs(input);
 280 }
 281
 282 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
 283 {
 284    return do_process_outputs(output);
 285 }
 286
 287 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
 288 {
 289    nir_variable *var = nir_deref_instr_get_variable(instr);
 290
 291    assert(instr->mode == nir_var_function_temp);
 292    assert(glsl_type_is_array(var->type));
 293
 294    // add an alias for the index to the register(s);
 295
 296
 297 }
 298
 299 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
 300 {
 301    auto& dest = instr->dest;
 302    unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
 303    m_var_mode[instr->var] = instr->mode;
 304    m_var_derefs[index] = instr->var;
 305
 306    sfn_log << SfnLog::io << "Add var deref:" << index
 307            << " with DDL:" << instr->var->data.driver_location << "\n";
 308 }
 309
 310 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
 311 {
 312    switch (io.name) {
 313    case TGSI_SEMANTIC_POSITION:
 314    case TGSI_SEMANTIC_PSIZE:
 315    case TGSI_SEMANTIC_EDGEFLAG:
 316    case TGSI_SEMANTIC_FACE:
 317    case TGSI_SEMANTIC_SAMPLEMASK:
 318    case TGSI_SEMANTIC_CLIPVERTEX:
 319       io.spi_sid = 0;
 320       break;
 321    case TGSI_SEMANTIC_GENERIC:
 322    case TGSI_SEMANTIC_TEXCOORD:
 323    case TGSI_SEMANTIC_PCOORD:
 324       io.spi_sid = io.sid + 1;
 325       break;
 326    default:
 327       /* For non-generic params - pack name and sid into 8 bits */
 328       io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
 329    }
 330 }
 331
 332 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
 333 {
 334    unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
 335
 336    sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
 337
 338    auto v = m_var_derefs.find(index);
 339    if (v != m_var_derefs.end())
 340       return v->second;
 341
 342      fprintf(stderr, "R600: could not find deref with index %d\n", index);
 343
 344      return nullptr;
 345
 346    /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
 347    return  nir_deref_instr_get_variable(deref); */
 348 }
 349
 350 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
 351 {
 352    return m_tex_instr.emit(instr);
 353 }
 354
 355 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
 356 {
 357    if (m_pending_else) {
 358       append_block(-1);
 359       m_output.back().emit(PInstruction(m_pending_else));
 360       append_block(1);
 361       m_pending_else = nullptr;
 362    }
 363
 364    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 365    if (m_output.empty())
 366       append_block(0);
 367
 368    m_output.back().emit(Instruction::Pointer(ir));
 369 }
 370
 371 void ShaderFromNirProcessor::emit_shader_start()
 372 {
 373    /* placeholder, may become an abstract method */
 374 }
 375
 376 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
 377 {
 378    switch (instr->type) {
 379    case nir_jump_break: {
 380       auto b = new LoopBreakInstruction();
 381       emit_instruction(b);
 382       return true;
 383    }
 384    case nir_jump_continue: {
 385       auto  b = new LoopContInstruction();
 386       emit_instruction(b);
 387       return true;
 388    }
 389    default: {
 390       nir_instr *i = reinterpret_cast<nir_instr*>(instr);
 391       sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
 392       return false;
 393    }
 394    }
 395    return true;
 396 }
 397
 398 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
 399 {
 400    return m_alu_instr.emit(instr);
 401 }
 402
 403 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
 404 {
 405    return false;
 406 }
 407
 408 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
 409 {
 410    LoopBeginInstruction *loop = new LoopBeginInstruction();
 411    emit_instruction(loop);
 412    m_loop_begin_block_map[loop_id] = loop;
 413    append_block(1);
 414    return true;
 415 }
 416 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
 417 {
 418    auto start = m_loop_begin_block_map.find(loop_id);
 419    if (start == m_loop_begin_block_map.end()) {
 420       sfn_log << SfnLog::err  << "End loop: Loop start for "
 421               << loop_id << "  not found\n";
 422       return false;
 423    }
 424    m_nesting_depth--;
 425    m_block_number++;
 426    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
 427    LoopEndInstruction *loop = new LoopEndInstruction(start->second);
 428    emit_instruction(loop);
 429
 430    m_loop_begin_block_map.erase(start);
 431    return true;
 432 }
 433
 434 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
 435 {
 436
 437    auto value = from_nir(if_stmt->condition, 0, 0);
 438    AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
 439                                              value, Value::zero, EmitInstruction::last);
 440    pred->set_flag(alu_update_exec);
 441    pred->set_flag(alu_update_pred);
 442    pred->set_cf_type(cf_alu_push_before);
 443
 444    append_block(1);
 445
 446    IfInstruction *ir = new IfInstruction(pred);
 447    emit_instruction(ir);
 448    assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
 449    m_if_block_start_map[if_id] = ir;
 450    return true;
 451 }
 452
 453 bool ShaderFromNirProcessor::emit_else_start(int if_id)
 454 {
 455    auto iif = m_if_block_start_map.find(if_id);
 456    if (iif == m_if_block_start_map.end()) {
 457       std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
 458       return false;
 459    }
 460
 461    if (iif->second->type() != Instruction::cond_if) {
 462       std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
 463       return false;
 464    }
 465    IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
 466    ElseInstruction *ir = new ElseInstruction(if_instr);
 467    m_if_block_start_map[if_id] = ir;
 468    m_pending_else = ir;
 469
 470    return true;
 471 }
 472
 473 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
 474 {
 475    auto ifelse = m_if_block_start_map.find(if_id);
 476    if (ifelse == m_if_block_start_map.end()) {
 477       std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
 478       return false;
 479    }
 480
 481    if (ifelse->second->type() != Instruction::cond_if &&
 482        ifelse->second->type() != Instruction::cond_else) {
 483       std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
 484       return false;
 485    }
 486    /* Clear pending else, if the else branch was empty, non will be emitted */
 487
 488    m_pending_else = nullptr;
 489
 490    append_block(-1);
 491    IfElseEndInstruction *ir = new IfElseEndInstruction();
 492    emit_instruction(ir);
 493
 494    return true;
 495 }
 496
 497 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
 498 {
 499    PValue src = get_temp_register();
 500    emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
 501
 502    GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
 503    emit_instruction(new FetchTCSIOParam(dest, src, offset));
 504
 505    return true;
 506
 507 }
 508
 509 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
 510 {
 511    auto address = varvec_from_nir(instr->src[0], instr->num_components);
 512    auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
 513
 514    emit_instruction(new LDSReadInstruction(address, dest_value));
 515    return true;
 516 }
 517
 518 static unsigned
 519 lds_op_from_intrinsic(nir_intrinsic_op op) {
 520    switch (op) {
 521    case nir_intrinsic_shared_atomic_add:
 522       return LDS_OP2_LDS_ADD_RET;
 523    case nir_intrinsic_shared_atomic_and:
 524       return LDS_OP2_LDS_AND_RET;
 525    case nir_intrinsic_shared_atomic_or:
 526       return LDS_OP2_LDS_OR_RET;
 527    case nir_intrinsic_shared_atomic_imax:
 528       return LDS_OP2_LDS_MAX_INT_RET;
 529    case nir_intrinsic_shared_atomic_umax:
 530       return LDS_OP2_LDS_MAX_UINT_RET;
 531    case nir_intrinsic_shared_atomic_imin:
 532       return LDS_OP2_LDS_MIN_INT_RET;
 533    case nir_intrinsic_shared_atomic_umin:
 534       return LDS_OP2_LDS_MIN_UINT_RET;
 535    case nir_intrinsic_shared_atomic_xor:
 536       return LDS_OP2_LDS_XOR_RET;
 537    case nir_intrinsic_shared_atomic_exchange:
 538       return LDS_OP2_LDS_XCHG_RET;
 539    case nir_intrinsic_shared_atomic_comp_swap:
 540       return LDS_OP3_LDS_CMP_XCHG_RET;
 541    default:
 542       unreachable("Unsupported shared atomic opcode");
 543    }
 544 }
 545
 546 bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
 547 {
 548    auto address = from_nir(instr->src[0], 0);
 549    auto dest_value = from_nir(instr->dest, 0);
 550    auto value = from_nir(instr->src[1], 0);
 551    auto op = lds_op_from_intrinsic(instr->intrinsic);
 552
 553    if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
 554       auto value2 = from_nir(instr->src[2], 0);
 555       emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
 556    } else {
 557       emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
 558    }
 559    return true;
 560 }
 561
 562
 563 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
 564 {
 565    unsigned write_mask = nir_intrinsic_write_mask(instr);
 566
 567    auto address = from_nir(instr->src[1], 0);
 568    int swizzle_base = (write_mask & 0x3) ? 0 : 2;
 569    write_mask |= write_mask >> 2;
 570
 571    auto value =  from_nir(instr->src[0], swizzle_base);
 572    if (!(write_mask & 2)) {
 573       emit_instruction(new LDSWriteInstruction(address, 0, value));
 574    } else {
 575       auto value1 = from_nir(instr->src[0], swizzle_base + 1);
 576       emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
 577    }
 578
 579    return true;
 580 }
 581
 582 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
 583 {
 584    r600::sfn_log << SfnLog::instr << "emit '"
 585                  << *reinterpret_cast<nir_instr*>(instr)
 586                  << "' (" << __func__ << ")\n";
 587
 588    if (emit_intrinsic_instruction_override(instr))
 589       return true;
 590
 591    if (m_ssbo_instr.emit(&instr->instr)) {
 592       m_sel.info.writes_memory = true;
 593       return true;
 594    }
 595
 596    switch (instr->intrinsic) {
 597    case nir_intrinsic_load_deref: {
 598       auto var = get_deref_location(instr->src[0]);
 599       if (!var)
 600          return false;
 601       auto mode_helper = m_var_mode.find(var);
 602       if (mode_helper == m_var_mode.end()) {
 603          cerr << "r600-nir: variable '" << var->name << "' not found\n";
 604          return false;
 605       }
 606       switch (mode_helper->second) {
 607       case nir_var_shader_in:
 608          return emit_load_input_deref(var, instr);
 609       case nir_var_function_temp:
 610          return emit_load_function_temp(var, instr);
 611       default:
 612          cerr << "r600-nir: Unsupported mode" << mode_helper->second
 613               << "for src variable\n";
 614          return false;
 615       }
 616    }
 617    case nir_intrinsic_store_scratch:
 618       return emit_store_scratch(instr);
 619    case nir_intrinsic_load_scratch:
 620       return emit_load_scratch(instr);
 621    case nir_intrinsic_store_deref:
 622       return emit_store_deref(instr);
 623    case nir_intrinsic_load_uniform:
 624       return reserve_uniform(instr);
 625    case nir_intrinsic_discard:
 626    case nir_intrinsic_discard_if:
 627       return emit_discard_if(instr);
 628    case nir_intrinsic_load_ubo_r600:
 629       return emit_load_ubo(instr);
 630    case nir_intrinsic_load_tcs_in_param_base_r600:
 631       return emit_load_tcs_param_base(instr, 0);
 632    case nir_intrinsic_load_tcs_out_param_base_r600:
 633       return emit_load_tcs_param_base(instr, 16);
 634    case nir_intrinsic_load_local_shared_r600:
 635    case nir_intrinsic_load_shared:
 636       return emit_load_local_shared(instr);
 637    case nir_intrinsic_store_local_shared_r600:
 638    case nir_intrinsic_store_shared:
 639       return emit_store_local_shared(instr);
 640    case nir_intrinsic_control_barrier:
 641    case nir_intrinsic_memory_barrier_tcs_patch:
 642    case nir_intrinsic_memory_barrier_shared:
 643    case nir_intrinsic_memory_barrier:
 644       return emit_barrier(instr);
 645    case nir_intrinsic_shared_atomic_add:
 646    case nir_intrinsic_shared_atomic_and:
 647    case nir_intrinsic_shared_atomic_or:
 648    case nir_intrinsic_shared_atomic_imax:
 649    case nir_intrinsic_shared_atomic_umax:
 650    case nir_intrinsic_shared_atomic_imin:
 651    case nir_intrinsic_shared_atomic_umin:
 652    case nir_intrinsic_shared_atomic_xor:
 653    case nir_intrinsic_shared_atomic_exchange:
 654    case nir_intrinsic_shared_atomic_comp_swap:
 655       return emit_atomic_local_shared(instr);
 656    case nir_intrinsic_copy_deref:
 657    case nir_intrinsic_load_constant:
 658    case nir_intrinsic_load_input:
 659    case nir_intrinsic_store_output:
 660
 661    default:
 662       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
 663       return false;
 664    }
 665    return false;
 666 }
 667
 668 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
 669 {
 670    return false;
 671 }
 672
 673 bool
 674 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
 675 {
 676    return false;
 677 }
 678
 679 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
 680 {
 681    AluInstruction *ir = new AluInstruction(op0_group_barrier);
 682    ir->set_flag(alu_last_instr);
 683    emit_instruction(ir);
 684    return true;
 685 }
 686
 687
 688 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
 689 {
 690    if (!dest.is_ssa) {
 691       auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
 692       if (as_last)
 693          ir->set_flag(alu_last_instr);
 694       emit_instruction(ir);
 695    } else {
 696       inject_register(dest.ssa.index, chan, value, true);
 697    }
 698    return true;
 699 }
 700
 701 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
 702 {
 703    PValue address = from_nir(instr->src[1], 0, 0);
 704
 705    auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
 706          swizzle_from_comps(instr->num_components));
 707
 708    int writemask = nir_intrinsic_write_mask(instr);
 709    int align = nir_intrinsic_align_mul(instr);
 710    int align_offset = nir_intrinsic_align_offset(instr);
 711
 712    WriteScratchInstruction *ir = nullptr;
 713    if (address->type() == Value::literal) {
 714       const auto& lv = static_cast<const LiteralValue&>(*address);
 715       ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
 716    } else {
 717       address = from_nir_with_fetch_constant(instr->src[1], 0);
 718       ir = new WriteScratchInstruction(address, value, align, align_offset,
 719                                        writemask, m_scratch_size);
 720    }
 721    emit_instruction(ir);
 722    sh_info().needs_scratch_space = 1;
 723    return true;
 724 }
 725
 726 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
 727 {
 728    PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
 729    std::array<PValue, 4> dst_val;
 730    for (int i = 0; i < 4; ++i)
 731       dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
 732
 733    GPRVector dst(dst_val);
 734    auto ir = new LoadFromScratch(dst, address, m_scratch_size);
 735    ir->prelude_append(new WaitAck(0));
 736    emit_instruction(ir);
 737    sh_info().needs_scratch_space = 1;
 738    return true;
 739 }
 740
 741 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
 742                                                                    unsigned mask,
 743                                                                    const GPRVector::Swizzle& swizzle,
 744                                                                    bool match)
 745 {
 746    bool use_same = true;
 747    GPRVector::Values v;
 748
 749    for (int i = 0; i < 4 && use_same; ++i)  {
 750       if ((1 << i) & mask) {
 751          if (swizzle[i] < 4) {
 752             v[i] = from_nir(src, swizzle[i]);
 753             assert(v[i]);
 754             if (v[i]->type() != Value::gpr)
 755                use_same = false;
 756             if (match && (v[i]->chan() != swizzle[i]))
 757                 use_same = false;
 758          }
 759       }
 760    }
 761
 762    if (use_same) {
 763       int i = 0;
 764       while (!v[i] && i < 4) ++i;
 765       assert(i < 4);
 766
 767       unsigned sel = v[i]->sel();
 768       for (i = 0; i < 4 && use_same; ++i) {
 769          if (!v[i])
 770             v[i] = PValue(new GPRValue(sel, swizzle[i]));
 771          else
 772             use_same &= v[i]->sel() == sel;
 773       }
 774    }
 775
 776    if (!use_same) {
 777       AluInstruction *ir = nullptr;
 778       int sel = allocate_temp_register();
 779       for (int i = 0; i < 4; ++i) {
 780          v[i] = PValue(new GPRValue(sel, swizzle[i]));
 781          if (swizzle[i] < 4 && (mask & (1 << i))) {
 782             ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
 783                                     EmitInstruction::write);
 784             emit_instruction(ir);
 785          }
 786       }
 787       if (ir)
 788          ir->set_flag(alu_last_instr);
 789    }
 790    return GPRVector(v);;
 791 }
 792
 793 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
 794 {
 795    nir_src& src0 = instr->src[0];
 796    nir_src& src1 = instr->src[1];
 797
 798    int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
 799    const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
 800
 801    int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
 802    const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
 803    if (literal0) {
 804       if (literal1) {
 805          uint bufid = literal0->value[0].u32;
 806          uint buf_ofs = literal1->value[0].u32 >> 4;
 807          int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
 808          AluInstruction *ir = nullptr;
 809          for (int i = 0; i < instr->num_components; ++i) {
 810             int cmp = buf_cmp + i;
 811             assert(cmp < 4);
 812             auto u = PValue(new UniformValue(512 +  buf_ofs, cmp, bufid + 1));
 813             if (instr->dest.is_ssa)
 814                add_uniform((instr->dest.ssa.index << 2) + i, u);
 815             else {
 816                ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
 817                emit_instruction(ir);
 818             }
 819          }
 820          if (ir)
 821             ir->set_flag(alu_last_instr);
 822          return true;
 823
 824       } else {
 825          /* literal0 is lost ...*/
 826          return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
 827       }
 828    } else {
 829       /* TODO: This can also be solved by using the CF indes on the ALU block, and
 830        * this would probably make sense when there are more then one loads with
 831        * the same buffer ID. */
 832       PValue bufid = from_nir(instr->src[0], 0, 0);
 833       PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
 834       GPRVector trgt;
 835       for (int i = 0; i < 4; ++i)
 836          trgt.set_reg_i(i, from_nir(instr->dest, i));
 837
 838       auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
 839                                      1, bufid, bim_zero);
 840
 841       emit_instruction(ir);
 842       for (int i = 0; i < instr->num_components ; ++i) {
 843          add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 844       }
 845       m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 846       return true;
 847    }
 848
 849 }
 850
 851 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
 852 {
 853    r600::sfn_log << SfnLog::instr << "emit '"
 854                  << *reinterpret_cast<nir_instr*>(instr)
 855                  << "' (" << __func__ << ")\n";
 856
 857    if (instr->intrinsic == nir_intrinsic_discard_if) {
 858       emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
 859                           {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
 860
 861    } else {
 862       emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
 863                        {Value::zero, Value::zero}, {alu_last_instr}));
 864    }
 865    m_sh_info.uses_kill = 1;
 866    return true;
 867 }
 868
 869 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
 870                                                    nir_intrinsic_instr* instr)
 871 {
 872    return do_emit_load_deref(var, instr);
 873 }
 874
 875 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
 876 {
 877    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 878                  << *reinterpret_cast<nir_instr*>(instr)
 879                  << "'\n";
 880
 881
 882    /* If the target register is a SSA register and the loading is not
 883     * indirect then we can do lazy loading, i.e. the uniform value can
 884     * be used directly. Otherwise we have to load the data for real
 885     * rigt away.
 886     */
 887
 888    /* Try to find the literal that defines the array index */
 889    const nir_load_const_instr* literal = nullptr;
 890    if (instr->src[0].is_ssa)
 891       literal = get_literal_constant(instr->src[0].ssa->index);
 892
 893    int base = nir_intrinsic_base(instr);
 894    if (literal) {
 895       AluInstruction *ir = nullptr;
 896
 897       for (int i = 0; i < instr->num_components ; ++i) {
 898          PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
 899          sfn_log << SfnLog::io << "uniform "
 900                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
 901
 902          if (instr->dest.is_ssa)
 903             add_uniform((instr->dest.ssa.index << 2) + i, u);
 904          else {
 905             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
 906                                                    u, {alu_write});
 907              emit_instruction(ir);
 908          }
 909       }
 910       if (ir)
 911          ir->set_flag(alu_last_instr);
 912    } else {
 913       PValue addr = from_nir(instr->src[0], 0, 0);
 914       return load_uniform_indirect(instr, addr, 16 * base, 0);
 915    }
 916    return true;
 917 }
 918
 919 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
 920 {
 921    if (!addr) {
 922       std::cerr << "r600-nir: don't know how uniform is addressed\n";
 923       return false;
 924    }
 925
 926    GPRVector trgt;
 927    for (int i = 0; i < 4; ++i)
 928       trgt.set_reg_i(i, from_nir(instr->dest, i));
 929
 930    if (addr->type() != Value::gpr) {
 931       emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
 932       addr = trgt.reg_i(0);
 933    }
 934
 935    /* FIXME: buffer index and index mode are not set correctly */
 936    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
 937                                   bufferid, PValue(), bim_none);
 938    emit_instruction(ir);
 939    for (int i = 0; i < instr->num_components ; ++i) {
 940       add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 941    }
 942    m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 943    return true;
 944 }
 945
 946 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
 947 {
 948    AluInstruction *ir = nullptr;
 949    for (int i = 0; i < literal->def.num_components ; ++i) {
 950       if (writemask & (1 << i)){
 951          PValue lsrc;
 952          switch (literal->def.bit_size) {
 953
 954          case 1:
 955             sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
 956             lsrc = literal->value[i].b ?
 957                      PValue(new LiteralValue( 0xffffffff, i)) :
 958                      Value::zero;
 959             break;
 960          case 32:
 961             sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
 962             if (literal->value[i].u32 == 0)
 963                lsrc = Value::zero;
 964             else if (literal->value[i].u32 == 1)
 965                lsrc = Value::one_i;
 966             else if (literal->value[i].f32 == 1.0f)
 967                lsrc = Value::one_f;
 968             else if (literal->value[i].f32 == 0.5f)
 969                lsrc = Value::zero_dot_5;
 970             else
 971                lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 972             break;
 973          default:
 974             sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
 975                     << " falling back to 32 bit\n";
 976             lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 977          }
 978          ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
 979
 980          emit_instruction(ir);
 981       }
 982    }
 983    return ir;
 984 }
 985
 986 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
 987 {
 988    PValue value = from_nir(src, component);
 989    if (value->type() != Value::gpr &&
 990        value->type() != Value::gpr_vector &&
 991        value->type() != Value::gpr_array_value) {
 992       PValue retval = get_temp_register();
 993       emit_instruction(new AluInstruction(op1_mov, retval, value,
 994                                           EmitInstruction::last_write));
 995       value = retval;
 996    }
 997    return value;
 998 }
 999
1000 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
1001 {
1002    auto out_var = get_deref_location(instr->src[0]);
1003    if (!out_var)
1004       return false;
1005
1006    return do_emit_store_deref(out_var, instr);
1007 }
1008
1009 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
1010 {
1011    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
1012                  << *reinterpret_cast<nir_instr*>(instr)
1013                  << "'\n";
1014
1015    /* Give the specific shader type a chance to process this, i.e. Geometry and
1016     * tesselation shaders need specialized deref_array, for the other shaders
1017     * it is lowered.
1018     */
1019    if (emit_deref_instruction_override(instr))
1020       return true;
1021
1022    switch (instr->deref_type) {
1023    case nir_deref_type_var:
1024       set_var_address(instr);
1025       return true;
1026    case nir_deref_type_array:
1027    case nir_deref_type_array_wildcard:
1028    case nir_deref_type_struct:
1029    case nir_deref_type_cast:
1030    default:
1031       fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
1032    }
1033    return false;
1034 }
1035
1036 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
1037 {
1038    AluInstruction *ir = nullptr;
1039    PValue sv[4];
1040
1041    assert(src.src.is_ssa);
1042
1043    for (int i = 0; i < src.src.ssa->num_components ; ++i)  {
1044       unsigned uindex = (src.src.ssa->index << 2) + i;
1045       sv[i] = uniform(uindex);
1046       assert(sv[i]);
1047    }
1048
1049    for (int i = 0; i < src.src.ssa->num_components ; ++i) {
1050       ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
1051                               EmitInstruction::write);
1052       emit_instruction(ir);
1053    }
1054    if (ir)
1055       ir->set_flag(alu_last_instr);
1056 }
1057
1058
1059
1060 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
1061                                               std::vector<PValue> srcs,
1062                                               const std::set<AluModifiers>& m_flags)
1063 {
1064    AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
1065    emit_instruction(ir);
1066    return true;
1067 }
1068
1069 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
1070 {
1071    m_output_register_map[loc] = gpr;
1072 }
1073
1074 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
1075 {
1076    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
1077    m_export_output.emit(PInstruction(ir));
1078 }
1079
1080 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
1081 {
1082    const GPRVector *retval = nullptr;
1083    auto val = m_output_register_map.find(location);
1084    if (val != m_output_register_map.end())
1085       retval =  val->second;
1086    return retval;
1087 }
1088
1089 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
1090 {
1091    r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
1092    m_inputs[pos] = var;
1093 }
1094
1095 void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
1096 {
1097    r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel <<  "\n";
1098    m_outputs[pos] = sel;
1099 }
1100
1101 void ShaderFromNirProcessor::append_block(int nesting_change)
1102 {
1103    m_nesting_depth += nesting_change;
1104    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1105 }
1106
1107 void ShaderFromNirProcessor::finalize()
1108 {
1109    do_finalize();
1110
1111    for (auto& i : m_inputs)
1112       m_sh_info.input[i.first].gpr = i.second->sel();
1113
1114    for (auto& i : m_outputs)
1115       m_sh_info.output[i.first].gpr = i.second;
1116
1117    m_output.push_back(m_export_output);
1118 }
1119
1120 }