src/gallium/drivers/r600/sfn/sfn_shader_base.cpp

   1 /* -*- mesa-c++  -*-
   2  *
   3  * Copyright (c) 2018 Collabora LTD
   4  *
   5  * Author: Gert Wollny <gert.wollny@collabora.com>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * on the rights to use, copy, modify, merge, publish, distribute, sub
  11  * license, and/or sell copies of the Software, and to permit persons to whom
  12  * the Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 #include "../r600_pipe.h"
  28 #include "../r600_shader.h"
  29 #include "sfn_shader_vertex.h"
  30
  31 #include "sfn_shader_compute.h"
  32 #include "sfn_shader_fragment.h"
  33 #include "sfn_shader_geometry.h"
  34 #include "sfn_liverange.h"
  35 #include "sfn_ir_to_assembly.h"
  36 #include "sfn_nir.h"
  37 #include "sfn_instruction_misc.h"
  38 #include "sfn_instruction_fetch.h"
  39 #include "sfn_instruction_lds.h"
  40
  41 #include <iostream>
  42
  43 #define ENABLE_DEBUG 1
  44
  45 #ifdef ENABLE_DEBUG
  46 #define DEBUG_SFN(X)  \
  47    do {\
  48       X; \
  49    } while (0)
  50 #else
  51 #define DEBUG_SFN(X)
  52 #endif
  53
  54 namespace r600 {
  55
  56 using namespace std;
  57
  58
  59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
  60                                                r600_pipe_shader_selector& sel,
  61                                                r600_shader &sh_info, int scratch_size,
  62                                                enum chip_class chip_class,
  63                                                int atomic_base):
  64    m_processor_type(ptype),
  65    m_nesting_depth(0),
  66    m_block_number(0),
  67    m_export_output(0, -1),
  68    m_sh_info(sh_info),
  69    m_chip_class(chip_class),
  70    m_tex_instr(*this),
  71    m_alu_instr(*this),
  72    m_ssbo_instr(*this),
  73    m_pending_else(nullptr),
  74    m_scratch_size(scratch_size),
  75    m_next_hwatomic_loc(0),
  76    m_sel(sel),
  77    m_atomic_base(atomic_base)
  78
  79 {
  80    m_sh_info.processor_type = ptype;
  81
  82 }
  83
  84
  85 ShaderFromNirProcessor::~ShaderFromNirProcessor()
  86 {
  87 }
  88
  89 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
  90 {
  91    switch (instr->type) {
  92    case nir_instr_type_tex: {
  93       nir_tex_instr *t = nir_instr_as_tex(instr);
  94       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
  95          sh_info().uses_tex_buffers = true;
  96       if (t->op == nir_texop_txs &&
  97           t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
  98           t->is_array)
  99          sh_info().has_txq_cube_array_z_comp = true;
 100       break;
 101    }
 102    case nir_instr_type_intrinsic: {
 103       auto *i = nir_instr_as_intrinsic(instr);
 104       switch (i->intrinsic) {
 105       case nir_intrinsic_ssbo_atomic_add:
 106       case nir_intrinsic_image_atomic_add:
 107       case nir_intrinsic_ssbo_atomic_and:
 108       case nir_intrinsic_image_atomic_and:
 109       case nir_intrinsic_ssbo_atomic_or:
 110       case nir_intrinsic_image_atomic_or:
 111       case nir_intrinsic_ssbo_atomic_imin:
 112       case nir_intrinsic_image_atomic_imin:
 113       case nir_intrinsic_ssbo_atomic_imax:
 114       case nir_intrinsic_image_atomic_imax:
 115       case nir_intrinsic_ssbo_atomic_umin:
 116       case nir_intrinsic_image_atomic_umin:
 117       case nir_intrinsic_ssbo_atomic_umax:
 118       case nir_intrinsic_image_atomic_umax:
 119       case nir_intrinsic_image_atomic_xor:
 120       case nir_intrinsic_image_atomic_exchange:
 121       case nir_intrinsic_image_atomic_comp_swap:
 122          m_sel.info.writes_memory = 1;
 123          /* fallthrough */
 124       case nir_intrinsic_image_load:
 125          m_ssbo_instr.set_require_rat_return_address();
 126          break;
 127       case nir_intrinsic_image_size: {
 128          if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
 129              nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
 130             sh_info().has_txq_cube_array_z_comp = true;
 131       }
 132
 133       default:
 134          ;
 135       }
 136
 137
 138    }
 139    default:
 140       ;
 141    }
 142
 143    return scan_sysvalue_access(instr);
 144 }
 145
 146 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
 147 {
 148   return m_chip_class;
 149 }
 150
 151 bool ShaderFromNirProcessor::allocate_reserved_registers()
 152 {
 153    bool retval = do_allocate_reserved_registers();
 154    return retval;
 155 }
 156
 157 static void remap_shader_info(r600_shader& sh_info,
 158                               std::vector<rename_reg_pair>& map,
 159                               UNUSED ValueMap& values)
 160 {
 161    for (unsigned i = 0; i < sh_info.ninput; ++i) {
 162       sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
 163               << " of map.size()\n";
 164
 165       assert(sh_info.input[i].gpr < map.size());
 166       auto new_index = map[sh_info.input[i].gpr];
 167       if (new_index.valid)
 168          sh_info.input[i].gpr = new_index.new_reg;
 169       map[sh_info.input[i].gpr].used = true;
 170    }
 171
 172    for (unsigned i = 0; i < sh_info.noutput; ++i) {
 173       assert(sh_info.output[i].gpr < map.size());
 174       auto new_index = map[sh_info.output[i].gpr];
 175       if (new_index.valid)
 176          sh_info.output[i].gpr = new_index.new_reg;
 177       map[sh_info.output[i].gpr].used = true;
 178    }
 179 }
 180
 181 void ShaderFromNirProcessor::remap_registers()
 182 {
 183    // register renumbering
 184    auto rc = register_count();
 185    if (!rc)
 186       return;
 187
 188    std::vector<register_live_range> register_live_ranges(rc);
 189
 190    auto temp_register_map = get_temp_registers();
 191
 192    Shader sh{m_output, temp_register_map};
 193    LiverangeEvaluator().run(sh, register_live_ranges);
 194    auto register_map = get_temp_registers_remapping(register_live_ranges);
 195
 196    sfn_log << SfnLog::merge << "=========Mapping===========\n";
 197    for (size_t  i = 0; i < register_map.size(); ++i)
 198       if (register_map[i].valid)
 199          sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
 200
 201    ValueRemapper vmap0(register_map, temp_register_map);
 202    for (auto& block: m_output)
 203       block.remap_registers(vmap0);
 204
 205    remap_shader_info(m_sh_info, register_map, temp_register_map);
 206
 207    /* Mark inputs as used registers, these registers should no be remapped */
 208    for (auto& v: sh.m_temp) {
 209       if (v.second->type() == Value::gpr) {
 210          const auto& g = static_cast<const GPRValue&>(*v.second);
 211          if (g.is_input())
 212             register_map[g.sel()].used = true;
 213       }
 214    }
 215
 216    int new_index = 0;
 217    for (auto& i : register_map) {
 218       i.valid = i.used;
 219       if (i.used)
 220          i.new_reg = new_index++;
 221    }
 222
 223    ValueRemapper vmap1(register_map, temp_register_map);
 224    for (auto& ir: m_output)
 225       ir.remap_registers(vmap1);
 226
 227    remap_shader_info(m_sh_info, register_map, temp_register_map);
 228 }
 229
 230 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
 231 {
 232    // m_uniform_type_map
 233    m_uniform_type_map[uniform->data.location] = uniform->type;
 234
 235    if (uniform->type->contains_atomic()) {
 236       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
 237       sh_info().nhwatomic += natomics;
 238
 239       if (uniform->type->is_array())
 240          sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
 241
 242       sh_info().uses_atomics = 1;
 243
 244       struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
 245       ++sh_info().nhwatomic_ranges;
 246       atom.buffer_id = uniform->data.binding;
 247       atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
 248       atom.start = m_next_hwatomic_loc;
 249       atom.end = atom.start + natomics - 1;
 250       m_next_hwatomic_loc = atom.end + 1;
 251       //atom.array_id = uniform->type->is_array() ? 1 : 0;
 252
 253       m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
 254
 255       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
 256               << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
 257    }
 258
 259    if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
 260       sh_info().uses_images = 1;
 261    }
 262
 263    return true;
 264 }
 265
 266 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
 267 {
 268    return do_process_inputs(input);
 269 }
 270
 271 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
 272 {
 273    return do_process_outputs(output);
 274 }
 275
 276 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
 277 {
 278    nir_variable *var = nir_deref_instr_get_variable(instr);
 279
 280    assert(instr->mode == nir_var_function_temp);
 281    assert(glsl_type_is_array(var->type));
 282
 283    // add an alias for the index to the register(s);
 284
 285
 286 }
 287
 288 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
 289 {
 290    auto& dest = instr->dest;
 291    unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
 292    m_var_mode[instr->var] = instr->mode;
 293    m_var_derefs[index] = instr->var;
 294
 295    sfn_log << SfnLog::io << "Add var deref:" << index
 296            << " with DDL:" << instr->var->data.driver_location << "\n";
 297 }
 298
 299 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
 300 {
 301    switch (io.name) {
 302    case TGSI_SEMANTIC_POSITION:
 303    case TGSI_SEMANTIC_PSIZE:
 304    case TGSI_SEMANTIC_EDGEFLAG:
 305    case TGSI_SEMANTIC_FACE:
 306    case TGSI_SEMANTIC_SAMPLEMASK:
 307    case TGSI_SEMANTIC_CLIPVERTEX:
 308       io.spi_sid = 0;
 309       break;
 310    case TGSI_SEMANTIC_GENERIC:
 311    case TGSI_SEMANTIC_TEXCOORD:
 312    case TGSI_SEMANTIC_PCOORD:
 313       io.spi_sid = io.sid + 1;
 314       break;
 315    default:
 316       /* For non-generic params - pack name and sid into 8 bits */
 317       io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
 318    }
 319 }
 320
 321 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
 322 {
 323    unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
 324
 325    sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
 326
 327    auto v = m_var_derefs.find(index);
 328    if (v != m_var_derefs.end())
 329       return v->second;
 330
 331      fprintf(stderr, "R600: could not find deref with index %d\n", index);
 332
 333      return nullptr;
 334
 335    /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
 336    return  nir_deref_instr_get_variable(deref); */
 337 }
 338
 339 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
 340 {
 341    return m_tex_instr.emit(instr);
 342 }
 343
 344 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
 345 {
 346    if (m_pending_else) {
 347       append_block(-1);
 348       m_output.back().emit(PInstruction(m_pending_else));
 349       append_block(1);
 350       m_pending_else = nullptr;
 351    }
 352
 353    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 354    if (m_output.empty())
 355       append_block(0);
 356
 357    m_output.back().emit(Instruction::Pointer(ir));
 358 }
 359
 360 void ShaderFromNirProcessor::emit_shader_start()
 361 {
 362    /* placeholder, may become an abstract method */
 363 }
 364
 365 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
 366 {
 367    switch (instr->type) {
 368    case nir_jump_break: {
 369       auto b = new LoopBreakInstruction();
 370       emit_instruction(b);
 371       return true;
 372    }
 373    case nir_jump_continue: {
 374       auto  b = new LoopContInstruction();
 375       emit_instruction(b);
 376       return true;
 377    }
 378    default: {
 379       nir_instr *i = reinterpret_cast<nir_instr*>(instr);
 380       sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
 381       return false;
 382    }
 383    }
 384    return true;
 385 }
 386
 387 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
 388 {
 389    return m_alu_instr.emit(instr);
 390 }
 391
 392 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
 393 {
 394    return false;
 395 }
 396
 397 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
 398 {
 399    LoopBeginInstruction *loop = new LoopBeginInstruction();
 400    emit_instruction(loop);
 401    m_loop_begin_block_map[loop_id] = loop;
 402    append_block(1);
 403    return true;
 404 }
 405 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
 406 {
 407    auto start = m_loop_begin_block_map.find(loop_id);
 408    if (start == m_loop_begin_block_map.end()) {
 409       sfn_log << SfnLog::err  << "End loop: Loop start for "
 410               << loop_id << "  not found\n";
 411       return false;
 412    }
 413    m_nesting_depth--;
 414    m_block_number++;
 415    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
 416    LoopEndInstruction *loop = new LoopEndInstruction(start->second);
 417    emit_instruction(loop);
 418
 419    m_loop_begin_block_map.erase(start);
 420    return true;
 421 }
 422
 423 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
 424 {
 425
 426    auto value = from_nir(if_stmt->condition, 0, 0);
 427    AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
 428                                              value, Value::zero, EmitInstruction::last);
 429    pred->set_flag(alu_update_exec);
 430    pred->set_flag(alu_update_pred);
 431    pred->set_cf_type(cf_alu_push_before);
 432
 433    append_block(1);
 434
 435    IfInstruction *ir = new IfInstruction(pred);
 436    emit_instruction(ir);
 437    assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
 438    m_if_block_start_map[if_id] = ir;
 439    return true;
 440 }
 441
 442 bool ShaderFromNirProcessor::emit_else_start(int if_id)
 443 {
 444    auto iif = m_if_block_start_map.find(if_id);
 445    if (iif == m_if_block_start_map.end()) {
 446       std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
 447       return false;
 448    }
 449
 450    if (iif->second->type() != Instruction::cond_if) {
 451       std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
 452       return false;
 453    }
 454    IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
 455    ElseInstruction *ir = new ElseInstruction(if_instr);
 456    m_if_block_start_map[if_id] = ir;
 457    m_pending_else = ir;
 458
 459    return true;
 460 }
 461
 462 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
 463 {
 464    auto ifelse = m_if_block_start_map.find(if_id);
 465    if (ifelse == m_if_block_start_map.end()) {
 466       std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
 467       return false;
 468    }
 469
 470    if (ifelse->second->type() != Instruction::cond_if &&
 471        ifelse->second->type() != Instruction::cond_else) {
 472       std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
 473       return false;
 474    }
 475    /* Clear pending else, if the else branch was empty, non will be emitted */
 476
 477    m_pending_else = nullptr;
 478
 479    append_block(-1);
 480    IfElseEndInstruction *ir = new IfElseEndInstruction();
 481    emit_instruction(ir);
 482
 483    return true;
 484 }
 485
 486 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
 487 {
 488    PValue src = get_temp_register();
 489    emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
 490
 491    GPRVector dest = vec_from_nir(instr->dest, instr->num_components);
 492    emit_instruction(new FetchTCSIOParam(dest, src, offset));
 493
 494    return true;
 495
 496 }
 497
 498 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
 499 {
 500    auto address = varvec_from_nir(instr->src[0], instr->num_components);
 501    auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
 502
 503    emit_instruction(new LDSReadInstruction(address, dest_value));
 504    return true;
 505 }
 506
 507 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
 508 {
 509    unsigned write_mask = nir_intrinsic_write_mask(instr);
 510
 511    auto address = from_nir(instr->src[1], 0);
 512    int swizzle_base = (write_mask & 0x3) ? 0 : 2;
 513    write_mask |= write_mask >> 2;
 514
 515    auto value =  from_nir(instr->src[0], swizzle_base);
 516    if (!(write_mask & 2)) {
 517       emit_instruction(new LDSWriteInstruction(address, 0, value));
 518    } else {
 519       auto value1 = from_nir(instr->src[0], swizzle_base + 1);
 520       emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
 521    }
 522
 523    return true;
 524 }
 525
 526 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
 527 {
 528    r600::sfn_log << SfnLog::instr << "emit '"
 529                  << *reinterpret_cast<nir_instr*>(instr)
 530                  << "' (" << __func__ << ")\n";
 531
 532    if (emit_intrinsic_instruction_override(instr))
 533       return true;
 534
 535    if (m_ssbo_instr.emit(&instr->instr)) {
 536       m_sel.info.writes_memory = true;
 537       return true;
 538    }
 539
 540    switch (instr->intrinsic) {
 541    case nir_intrinsic_load_deref: {
 542       auto var = get_deref_location(instr->src[0]);
 543       if (!var)
 544          return false;
 545       auto mode_helper = m_var_mode.find(var);
 546       if (mode_helper == m_var_mode.end()) {
 547          cerr << "r600-nir: variable '" << var->name << "' not found\n";
 548          return false;
 549       }
 550       switch (mode_helper->second) {
 551       case nir_var_shader_in:
 552          return emit_load_input_deref(var, instr);
 553       case nir_var_function_temp:
 554          return emit_load_function_temp(var, instr);
 555       default:
 556          cerr << "r600-nir: Unsupported mode" << mode_helper->second
 557               << "for src variable\n";
 558          return false;
 559       }
 560    }
 561    case nir_intrinsic_store_scratch:
 562       return emit_store_scratch(instr);
 563    case nir_intrinsic_load_scratch:
 564       return emit_load_scratch(instr);
 565    case nir_intrinsic_store_deref:
 566       return emit_store_deref(instr);
 567    case nir_intrinsic_load_uniform:
 568       return reserve_uniform(instr);
 569    case nir_intrinsic_discard:
 570    case nir_intrinsic_discard_if:
 571       return emit_discard_if(instr);
 572    case nir_intrinsic_load_ubo_r600:
 573       return emit_load_ubo(instr);
 574    case nir_intrinsic_load_tcs_in_param_base_r600:
 575       return emit_load_tcs_param_base(instr, 0);
 576    case nir_intrinsic_load_tcs_out_param_base_r600:
 577       return emit_load_tcs_param_base(instr, 16);
 578    case nir_intrinsic_load_local_shared_r600:
 579    case nir_intrinsic_load_shared:
 580       return emit_load_local_shared(instr);
 581    case nir_intrinsic_store_local_shared_r600:
 582    case nir_intrinsic_store_shared:
 583       return emit_store_local_shared(instr);
 584    case nir_intrinsic_control_barrier:
 585    case nir_intrinsic_memory_barrier_tcs_patch:
 586    case nir_intrinsic_memory_barrier_shared:
 587       return emit_barrier(instr);
 588    case nir_intrinsic_copy_deref:
 589    case nir_intrinsic_load_constant:
 590    case nir_intrinsic_load_input:
 591    case nir_intrinsic_store_output:
 592
 593    default:
 594       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
 595       return false;
 596    }
 597    return false;
 598 }
 599
 600 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
 601 {
 602    return false;
 603 }
 604
 605 bool
 606 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
 607 {
 608    return false;
 609 }
 610
 611 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
 612 {
 613    AluInstruction *ir = new AluInstruction(op0_group_barrier);
 614    ir->set_flag(alu_last_instr);
 615    emit_instruction(ir);
 616    return true;
 617 }
 618
 619
 620 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
 621 {
 622    if (!dest.is_ssa) {
 623       auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
 624       if (as_last)
 625          ir->set_flag(alu_last_instr);
 626       emit_instruction(ir);
 627    } else {
 628       inject_register(dest.ssa.index, chan, value, true);
 629    }
 630    return true;
 631 }
 632
 633 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
 634 {
 635    PValue address = from_nir(instr->src[1], 0, 0);
 636
 637    auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
 638          swizzle_from_comps(instr->num_components));
 639
 640    int writemask = nir_intrinsic_write_mask(instr);
 641    int align = nir_intrinsic_align_mul(instr);
 642    int align_offset = nir_intrinsic_align_offset(instr);
 643
 644    WriteScratchInstruction *ir = nullptr;
 645    if (address->type() == Value::literal) {
 646       const auto& lv = static_cast<const LiteralValue&>(*address);
 647       ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
 648    } else {
 649       address = from_nir_with_fetch_constant(instr->src[1], 0);
 650       ir = new WriteScratchInstruction(address, value, align, align_offset,
 651                                        writemask, m_scratch_size);
 652    }
 653    emit_instruction(ir);
 654    sh_info().needs_scratch_space = 1;
 655    return true;
 656 }
 657
 658 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
 659 {
 660    PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
 661    std::array<PValue, 4> dst_val;
 662    for (int i = 0; i < 4; ++i)
 663       dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
 664
 665    GPRVector dst(dst_val);
 666    auto ir = new LoadFromScratch(dst, address, m_scratch_size);
 667    ir->prelude_append(new WaitAck(0));
 668    emit_instruction(ir);
 669    sh_info().needs_scratch_space = 1;
 670    return true;
 671 }
 672
 673 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
 674                                                                    unsigned mask,
 675                                                                    const GPRVector::Swizzle& swizzle,
 676                                                                    bool match)
 677 {
 678    bool use_same = true;
 679    GPRVector::Values v;
 680
 681    for (int i = 0; i < 4 && use_same; ++i)  {
 682       if ((1 << i) & mask) {
 683          if (swizzle[i] < 4) {
 684             v[i] = from_nir(src, swizzle[i]);
 685             assert(v[i]);
 686             if (v[i]->type() != Value::gpr)
 687                use_same = false;
 688             if (match && (v[i]->chan() != swizzle[i]))
 689                 use_same = false;
 690          }
 691       }
 692    }
 693
 694    if (use_same) {
 695       int i = 0;
 696       while (!v[i] && i < 4) ++i;
 697       assert(i < 4);
 698
 699       unsigned sel = v[i]->sel();
 700       for (i = 0; i < 4 && use_same; ++i) {
 701          if (!v[i])
 702             v[i] = PValue(new GPRValue(sel, swizzle[i]));
 703          else
 704             use_same &= v[i]->sel() == sel;
 705       }
 706    }
 707
 708    if (!use_same) {
 709       AluInstruction *ir = nullptr;
 710       int sel = allocate_temp_register();
 711       for (int i = 0; i < 4; ++i) {
 712          v[i] = PValue(new GPRValue(sel, swizzle[i]));
 713          if (swizzle[i] < 4 && (mask & (1 << i))) {
 714             ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
 715                                     EmitInstruction::write);
 716             emit_instruction(ir);
 717          }
 718       }
 719       if (ir)
 720          ir->set_flag(alu_last_instr);
 721    }
 722    return GPRVector(v);;
 723 }
 724
 725 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
 726 {
 727    nir_src& src0 = instr->src[0];
 728    nir_src& src1 = instr->src[1];
 729
 730    int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
 731    const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
 732
 733    int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
 734    const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
 735    if (literal0) {
 736       if (literal1) {
 737          uint bufid = literal0->value[0].u32;
 738          uint buf_ofs = literal1->value[0].u32 >> 4;
 739          int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
 740          AluInstruction *ir = nullptr;
 741          for (int i = 0; i < instr->num_components; ++i) {
 742             int cmp = buf_cmp + i;
 743             assert(cmp < 4);
 744             auto u = PValue(new UniformValue(512 +  buf_ofs, cmp, bufid + 1));
 745             if (instr->dest.is_ssa)
 746                add_uniform((instr->dest.ssa.index << 2) + i, u);
 747             else {
 748                ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
 749                emit_instruction(ir);
 750             }
 751          }
 752          if (ir)
 753             ir->set_flag(alu_last_instr);
 754          return true;
 755
 756       } else {
 757          /* literal0 is lost ...*/
 758          return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
 759       }
 760    } else {
 761       /* TODO: This can also be solved by using the CF indes on the ALU block, and
 762        * this would probably make sense when there are more then one loads with
 763        * the same buffer ID. */
 764       PValue bufid = from_nir(instr->src[0], 0, 0);
 765       PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
 766       GPRVector trgt;
 767       for (int i = 0; i < 4; ++i)
 768          trgt.set_reg_i(i, from_nir(instr->dest, i));
 769
 770       auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
 771                                      1, bufid, bim_zero);
 772
 773       emit_instruction(ir);
 774       for (int i = 0; i < instr->num_components ; ++i) {
 775          add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 776       }
 777       m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 778       return true;
 779    }
 780
 781 }
 782
 783 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
 784 {
 785    r600::sfn_log << SfnLog::instr << "emit '"
 786                  << *reinterpret_cast<nir_instr*>(instr)
 787                  << "' (" << __func__ << ")\n";
 788
 789    if (instr->intrinsic == nir_intrinsic_discard_if) {
 790       emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
 791                           {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
 792
 793    } else {
 794       emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
 795                        {Value::zero, Value::zero}, {alu_last_instr}));
 796    }
 797    m_sh_info.uses_kill = 1;
 798    return true;
 799 }
 800
 801 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
 802                                                    nir_intrinsic_instr* instr)
 803 {
 804    return do_emit_load_deref(var, instr);
 805 }
 806
 807 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
 808 {
 809    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 810                  << *reinterpret_cast<nir_instr*>(instr)
 811                  << "'\n";
 812
 813
 814    /* If the target register is a SSA register and the loading is not
 815     * indirect then we can do lazy loading, i.e. the uniform value can
 816     * be used directly. Otherwise we have to load the data for real
 817     * rigt away.
 818     */
 819
 820    /* Try to find the literal that defines the array index */
 821    const nir_load_const_instr* literal = nullptr;
 822    if (instr->src[0].is_ssa)
 823       literal = get_literal_constant(instr->src[0].ssa->index);
 824
 825    int base = nir_intrinsic_base(instr);
 826    if (literal) {
 827       AluInstruction *ir = nullptr;
 828
 829       for (int i = 0; i < instr->num_components ; ++i) {
 830          PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
 831          sfn_log << SfnLog::io << "uniform "
 832                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
 833
 834          if (instr->dest.is_ssa)
 835             add_uniform((instr->dest.ssa.index << 2) + i, u);
 836          else {
 837             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
 838                                                    u, {alu_write});
 839              emit_instruction(ir);
 840          }
 841       }
 842       if (ir)
 843          ir->set_flag(alu_last_instr);
 844    } else {
 845       PValue addr = from_nir(instr->src[0], 0, 0);
 846       return load_uniform_indirect(instr, addr, 16 * base, 0);
 847    }
 848    return true;
 849 }
 850
 851 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
 852 {
 853    if (!addr) {
 854       std::cerr << "r600-nir: don't know how uniform is addressed\n";
 855       return false;
 856    }
 857
 858    GPRVector trgt;
 859    for (int i = 0; i < 4; ++i)
 860       trgt.set_reg_i(i, from_nir(instr->dest, i));
 861
 862    if (addr->type() != Value::gpr) {
 863       emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
 864       addr = trgt.reg_i(0);
 865    }
 866
 867    /* FIXME: buffer index and index mode are not set correctly */
 868    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
 869                                   bufferid, PValue(), bim_none);
 870    emit_instruction(ir);
 871    for (int i = 0; i < instr->num_components ; ++i) {
 872       add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 873    }
 874    m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 875    return true;
 876 }
 877
 878 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
 879 {
 880    AluInstruction *ir = nullptr;
 881    for (int i = 0; i < literal->def.num_components ; ++i) {
 882       if (writemask & (1 << i)){
 883          PValue lsrc;
 884          switch (literal->def.bit_size) {
 885
 886          case 1:
 887             sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
 888             lsrc = literal->value[i].b ?
 889                      PValue(new LiteralValue( 0xffffffff, i)) :
 890                      Value::zero;
 891             break;
 892          case 32:
 893             sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
 894             if (literal->value[i].u32 == 0)
 895                lsrc = Value::zero;
 896             else if (literal->value[i].u32 == 1)
 897                lsrc = Value::one_i;
 898             else if (literal->value[i].f32 == 1.0f)
 899                lsrc = Value::one_f;
 900             else if (literal->value[i].f32 == 0.5f)
 901                lsrc = Value::zero_dot_5;
 902             else
 903                lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 904             break;
 905          default:
 906             sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
 907                     << " falling back to 32 bit\n";
 908             lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 909          }
 910          ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
 911
 912          emit_instruction(ir);
 913       }
 914    }
 915    return ir;
 916 }
 917
 918 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
 919 {
 920    PValue value = from_nir(src, component);
 921    if (value->type() != Value::gpr &&
 922        value->type() != Value::gpr_vector &&
 923        value->type() != Value::gpr_array_value) {
 924       PValue retval = get_temp_register();
 925       emit_instruction(new AluInstruction(op1_mov, retval, value,
 926                                           EmitInstruction::last_write));
 927       value = retval;
 928    }
 929    return value;
 930 }
 931
 932 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
 933 {
 934    auto out_var = get_deref_location(instr->src[0]);
 935    if (!out_var)
 936       return false;
 937
 938    return do_emit_store_deref(out_var, instr);
 939 }
 940
 941 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
 942 {
 943    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 944                  << *reinterpret_cast<nir_instr*>(instr)
 945                  << "'\n";
 946
 947    /* Give the specific shader type a chance to process this, i.e. Geometry and
 948     * tesselation shaders need specialized deref_array, for the other shaders
 949     * it is lowered.
 950     */
 951    if (emit_deref_instruction_override(instr))
 952       return true;
 953
 954    switch (instr->deref_type) {
 955    case nir_deref_type_var:
 956       set_var_address(instr);
 957       return true;
 958    case nir_deref_type_array:
 959    case nir_deref_type_array_wildcard:
 960    case nir_deref_type_struct:
 961    case nir_deref_type_cast:
 962    default:
 963       fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
 964    }
 965    return false;
 966 }
 967
 968 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
 969 {
 970    AluInstruction *ir = nullptr;
 971    PValue sv[4];
 972
 973    assert(src.src.is_ssa);
 974
 975    for (int i = 0; i < src.src.ssa->num_components ; ++i)  {
 976       unsigned uindex = (src.src.ssa->index << 2) + i;
 977       sv[i] = uniform(uindex);
 978       assert(sv[i]);
 979    }
 980
 981    for (int i = 0; i < src.src.ssa->num_components ; ++i) {
 982       ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
 983                               EmitInstruction::write);
 984       emit_instruction(ir);
 985    }
 986    if (ir)
 987       ir->set_flag(alu_last_instr);
 988 }
 989
 990
 991
 992 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
 993                                               std::vector<PValue> srcs,
 994                                               const std::set<AluModifiers>& m_flags)
 995 {
 996    AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
 997    emit_instruction(ir);
 998    return true;
 999 }
1000
1001 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
1002 {
1003    m_output_register_map[loc] = gpr;
1004 }
1005
1006 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
1007 {
1008    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
1009    m_export_output.emit(PInstruction(ir));
1010 }
1011
1012 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
1013 {
1014    const GPRVector *retval = nullptr;
1015    auto val = m_output_register_map.find(location);
1016    if (val != m_output_register_map.end())
1017       retval =  val->second;
1018    return retval;
1019 }
1020
1021 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
1022 {
1023    r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
1024    m_inputs[pos] = var;
1025 }
1026
1027 void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
1028 {
1029    r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel <<  "\n";
1030    m_outputs[pos] = sel;
1031 }
1032
1033 void ShaderFromNirProcessor::append_block(int nesting_change)
1034 {
1035    m_nesting_depth += nesting_change;
1036    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1037 }
1038
1039 void ShaderFromNirProcessor::finalize()
1040 {
1041    do_finalize();
1042
1043    for (auto& i : m_inputs)
1044       m_sh_info.input[i.first].gpr = i.second->sel();
1045
1046    for (auto& i : m_outputs)
1047       m_sh_info.output[i.first].gpr = i.second;
1048
1049    m_output.push_back(m_export_output);
1050 }
1051
1052 }