src/gallium/drivers/r600/sfn/sfn_shader_base.cpp

   1 /* -*- mesa-c++  -*-
   2  *
   3  * Copyright (c) 2018 Collabora LTD
   4  *
   5  * Author: Gert Wollny <gert.wollny@collabora.com>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * on the rights to use, copy, modify, merge, publish, distribute, sub
  11  * license, and/or sell copies of the Software, and to permit persons to whom
  12  * the Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 #include "../r600_pipe.h"
  28 #include "../r600_shader.h"
  29 #include "sfn_shader_vertex.h"
  30
  31 #include "sfn_shader_compute.h"
  32 #include "sfn_shader_fragment.h"
  33 #include "sfn_shader_geometry.h"
  34 #include "sfn_liverange.h"
  35 #include "sfn_ir_to_assembly.h"
  36 #include "sfn_nir.h"
  37 #include "sfn_instruction_misc.h"
  38 #include "sfn_instruction_fetch.h"
  39 #include "sfn_instruction_lds.h"
  40
  41 #include <iostream>
  42
  43 #define ENABLE_DEBUG 1
  44
  45 #ifdef ENABLE_DEBUG
  46 #define DEBUG_SFN(X)  \
  47    do {\
  48       X; \
  49    } while (0)
  50 #else
  51 #define DEBUG_SFN(X)
  52 #endif
  53
  54 namespace r600 {
  55
  56 using namespace std;
  57
  58
  59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
  60                                                r600_pipe_shader_selector& sel,
  61                                                r600_shader &sh_info, int scratch_size,
  62                                                enum chip_class chip_class):
  63    m_processor_type(ptype),
  64    m_nesting_depth(0),
  65    m_block_number(0),
  66    m_export_output(0, -1),
  67    m_sh_info(sh_info),
  68    m_chip_class(chip_class),
  69    m_tex_instr(*this),
  70    m_alu_instr(*this),
  71    m_ssbo_instr(*this),
  72    m_pending_else(nullptr),
  73    m_scratch_size(scratch_size),
  74    m_next_hwatomic_loc(0),
  75    m_sel(sel)
  76 {
  77    m_sh_info.processor_type = ptype;
  78 }
  79
  80
  81 ShaderFromNirProcessor::~ShaderFromNirProcessor()
  82 {
  83 }
  84
  85 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
  86 {
  87    switch (instr->type) {
  88    case nir_instr_type_tex: {
  89       nir_tex_instr *t = nir_instr_as_tex(instr);
  90       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
  91          sh_info().uses_tex_buffers = true;
  92    }
  93    default:
  94       ;
  95    }
  96
  97    return scan_sysvalue_access(instr);
  98 }
  99
 100 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
 101 {
 102   return m_chip_class;
 103 }
 104
 105 bool ShaderFromNirProcessor::allocate_reserved_registers()
 106 {
 107    bool retval = do_allocate_reserved_registers();
 108    return retval;
 109 }
 110
 111 static void remap_shader_info(r600_shader& sh_info,
 112                               std::vector<rename_reg_pair>& map,
 113                               UNUSED ValueMap& values)
 114 {
 115    for (unsigned i = 0; i < sh_info.ninput; ++i) {
 116       sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
 117               << " of map.size()\n";
 118
 119       assert(sh_info.input[i].gpr < map.size());
 120       auto new_index = map[sh_info.input[i].gpr];
 121       if (new_index.valid)
 122          sh_info.input[i].gpr = new_index.new_reg;
 123       map[sh_info.input[i].gpr].used = true;
 124    }
 125
 126    for (unsigned i = 0; i < sh_info.noutput; ++i) {
 127       assert(sh_info.output[i].gpr < map.size());
 128       auto new_index = map[sh_info.output[i].gpr];
 129       if (new_index.valid)
 130          sh_info.output[i].gpr = new_index.new_reg;
 131       map[sh_info.output[i].gpr].used = true;
 132    }
 133 }
 134
 135 void ShaderFromNirProcessor::remap_registers()
 136 {
 137    // register renumbering
 138    auto rc = register_count();
 139    if (!rc)
 140       return;
 141
 142    std::vector<register_live_range> register_live_ranges(rc);
 143
 144    auto temp_register_map = get_temp_registers();
 145
 146    Shader sh{m_output, temp_register_map};
 147    LiverangeEvaluator().run(sh, register_live_ranges);
 148    auto register_map = get_temp_registers_remapping(register_live_ranges);
 149
 150    sfn_log << SfnLog::merge << "=========Mapping===========\n";
 151    for (size_t  i = 0; i < register_map.size(); ++i)
 152       if (register_map[i].valid)
 153          sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
 154
 155    ValueRemapper vmap0(register_map, temp_register_map);
 156    for (auto& block: m_output)
 157       block.remap_registers(vmap0);
 158
 159    remap_shader_info(m_sh_info, register_map, temp_register_map);
 160
 161    /* Mark inputs as used registers, these registers should no be remapped */
 162    for (auto& v: sh.m_temp) {
 163       if (v.second->type() == Value::gpr) {
 164          const auto& g = static_cast<const GPRValue&>(*v.second);
 165          if (g.is_input())
 166             register_map[g.sel()].used = true;
 167       }
 168    }
 169
 170    int new_index = 0;
 171    for (auto& i : register_map) {
 172       i.valid = i.used;
 173       if (i.used)
 174          i.new_reg = new_index++;
 175    }
 176
 177    ValueRemapper vmap1(register_map, temp_register_map);
 178    for (auto& ir: m_output)
 179       ir.remap_registers(vmap1);
 180
 181    remap_shader_info(m_sh_info, register_map, temp_register_map);
 182 }
 183
 184 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
 185 {
 186    // m_uniform_type_map
 187    m_uniform_type_map[uniform->data.location] = uniform->type;
 188
 189    if (uniform->type->contains_atomic()) {
 190       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
 191       sh_info().nhwatomic += natomics;
 192
 193       if (uniform->type->is_array())
 194          sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
 195
 196       sh_info().uses_atomics = 1;
 197
 198       struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
 199       ++sh_info().nhwatomic_ranges;
 200       atom.buffer_id = uniform->data.binding;
 201       atom.hw_idx = m_next_hwatomic_loc;
 202       atom.start = m_next_hwatomic_loc;
 203       atom.end = atom.start + natomics - 1;
 204       m_next_hwatomic_loc = atom.end + 1;
 205       //atom.array_id = uniform->type->is_array() ? 1 : 0;
 206
 207       m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
 208
 209       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
 210               << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
 211    }
 212
 213    if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
 214       sh_info().uses_images = 1;
 215    }
 216
 217    return true;
 218 }
 219
 220 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
 221 {
 222    return do_process_inputs(input);
 223 }
 224
 225 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
 226 {
 227    return do_process_outputs(output);
 228 }
 229
 230 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
 231 {
 232    nir_variable *var = nir_deref_instr_get_variable(instr);
 233
 234    assert(instr->mode == nir_var_function_temp);
 235    assert(glsl_type_is_array(var->type));
 236
 237    // add an alias for the index to the register(s);
 238
 239
 240 }
 241
 242 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
 243 {
 244    auto& dest = instr->dest;
 245    unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
 246    m_var_mode[instr->var] = instr->mode;
 247    m_var_derefs[index] = instr->var;
 248
 249    sfn_log << SfnLog::io << "Add var deref:" << index
 250            << " with DDL:" << instr->var->data.driver_location << "\n";
 251 }
 252
 253 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
 254 {
 255    switch (io.name) {
 256    case TGSI_SEMANTIC_POSITION:
 257    case TGSI_SEMANTIC_PSIZE:
 258    case TGSI_SEMANTIC_EDGEFLAG:
 259    case TGSI_SEMANTIC_FACE:
 260    case TGSI_SEMANTIC_SAMPLEMASK:
 261    case TGSI_SEMANTIC_CLIPVERTEX:
 262       io.spi_sid = 0;
 263       break;
 264    case TGSI_SEMANTIC_GENERIC:
 265    case TGSI_SEMANTIC_TEXCOORD:
 266    case TGSI_SEMANTIC_PCOORD:
 267       io.spi_sid = io.sid + 1;
 268       break;
 269    default:
 270       /* For non-generic params - pack name and sid into 8 bits */
 271       io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
 272    }
 273 }
 274
 275 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
 276 {
 277    unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
 278
 279    sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
 280
 281    auto v = m_var_derefs.find(index);
 282    if (v != m_var_derefs.end())
 283       return v->second;
 284
 285      fprintf(stderr, "R600: could not find deref with index %d\n", index);
 286
 287      return nullptr;
 288
 289    /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
 290    return  nir_deref_instr_get_variable(deref); */
 291 }
 292
 293 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
 294 {
 295    return m_tex_instr.emit(instr);
 296 }
 297
 298 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
 299 {
 300    if (m_pending_else) {
 301       append_block(-1);
 302       m_output.back().emit(PInstruction(m_pending_else));
 303       append_block(1);
 304       m_pending_else = nullptr;
 305    }
 306
 307    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 308    if (m_output.empty())
 309       append_block(0);
 310
 311    m_output.back().emit(Instruction::Pointer(ir));
 312 }
 313
 314 void ShaderFromNirProcessor::emit_shader_start()
 315 {
 316    /* placeholder, may become an abstract method */
 317 }
 318
 319 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
 320 {
 321    switch (instr->type) {
 322    case nir_jump_break: {
 323       auto b = new LoopBreakInstruction();
 324       emit_instruction(b);
 325       return true;
 326    }
 327    case nir_jump_continue: {
 328       auto  b = new LoopContInstruction();
 329       emit_instruction(b);
 330       return true;
 331    }
 332    default: {
 333       nir_instr *i = reinterpret_cast<nir_instr*>(instr);
 334       sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
 335       return false;
 336    }
 337    }
 338    return true;
 339 }
 340
 341 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
 342 {
 343    return m_alu_instr.emit(instr);
 344 }
 345
 346 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
 347 {
 348    return false;
 349 }
 350
 351 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
 352 {
 353    LoopBeginInstruction *loop = new LoopBeginInstruction();
 354    emit_instruction(loop);
 355    m_loop_begin_block_map[loop_id] = loop;
 356    append_block(1);
 357    return true;
 358 }
 359 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
 360 {
 361    auto start = m_loop_begin_block_map.find(loop_id);
 362    if (start == m_loop_begin_block_map.end()) {
 363       sfn_log << SfnLog::err  << "End loop: Loop start for "
 364               << loop_id << "  not found\n";
 365       return false;
 366    }
 367    m_nesting_depth--;
 368    m_block_number++;
 369    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
 370    LoopEndInstruction *loop = new LoopEndInstruction(start->second);
 371    emit_instruction(loop);
 372
 373    m_loop_begin_block_map.erase(start);
 374    return true;
 375 }
 376
 377 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
 378 {
 379
 380    auto value = from_nir(if_stmt->condition, 0, 0);
 381    AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
 382                                              value, Value::zero, EmitInstruction::last);
 383    pred->set_flag(alu_update_exec);
 384    pred->set_flag(alu_update_pred);
 385    pred->set_cf_type(cf_alu_push_before);
 386
 387    append_block(1);
 388
 389    IfInstruction *ir = new IfInstruction(pred);
 390    emit_instruction(ir);
 391    assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
 392    m_if_block_start_map[if_id] = ir;
 393    return true;
 394 }
 395
 396 bool ShaderFromNirProcessor::emit_else_start(int if_id)
 397 {
 398    auto iif = m_if_block_start_map.find(if_id);
 399    if (iif == m_if_block_start_map.end()) {
 400       std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
 401       return false;
 402    }
 403
 404    if (iif->second->type() != Instruction::cond_if) {
 405       std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
 406       return false;
 407    }
 408    IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
 409    ElseInstruction *ir = new ElseInstruction(if_instr);
 410    m_if_block_start_map[if_id] = ir;
 411    m_pending_else = ir;
 412
 413    return true;
 414 }
 415
 416 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
 417 {
 418    auto ifelse = m_if_block_start_map.find(if_id);
 419    if (ifelse == m_if_block_start_map.end()) {
 420       std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
 421       return false;
 422    }
 423
 424    if (ifelse->second->type() != Instruction::cond_if &&
 425        ifelse->second->type() != Instruction::cond_else) {
 426       std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
 427       return false;
 428    }
 429    /* Clear pending else, if the else branch was empty, non will be emitted */
 430
 431    m_pending_else = nullptr;
 432
 433    append_block(-1);
 434    IfElseEndInstruction *ir = new IfElseEndInstruction();
 435    emit_instruction(ir);
 436
 437    return true;
 438 }
 439
 440 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
 441 {
 442    PValue src = get_temp_register();
 443    emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
 444
 445    GPRVector dest = vec_from_nir(instr->dest, instr->num_components);
 446    emit_instruction(new FetchTCSIOParam(dest, src, offset));
 447
 448    return true;
 449
 450 }
 451
 452 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
 453 {
 454    auto address = varvec_from_nir(instr->src[0], instr->num_components);
 455    auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
 456
 457    emit_instruction(new LDSReadInstruction(address, dest_value));
 458    return true;
 459 }
 460
 461 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
 462 {
 463    unsigned write_mask = nir_intrinsic_write_mask(instr);
 464
 465    auto address = from_nir(instr->src[1], 0);
 466    int swizzle_base = (write_mask & 0x3) ? 0 : 2;
 467    write_mask |= write_mask >> 2;
 468
 469    auto value =  from_nir(instr->src[0], swizzle_base);
 470    if (!(write_mask & 2)) {
 471       emit_instruction(new LDSWriteInstruction(address, 0, value));
 472    } else {
 473       auto value1 = from_nir(instr->src[0], swizzle_base + 1);
 474       emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
 475    }
 476
 477    return true;
 478 }
 479
 480 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
 481 {
 482    r600::sfn_log << SfnLog::instr << "emit '"
 483                  << *reinterpret_cast<nir_instr*>(instr)
 484                  << "' (" << __func__ << ")\n";
 485
 486    if (emit_intrinsic_instruction_override(instr))
 487       return true;
 488
 489    switch (instr->intrinsic) {
 490    case nir_intrinsic_load_deref: {
 491       auto var = get_deref_location(instr->src[0]);
 492       if (!var)
 493          return false;
 494       auto mode_helper = m_var_mode.find(var);
 495       if (mode_helper == m_var_mode.end()) {
 496          cerr << "r600-nir: variable '" << var->name << "' not found\n";
 497          return false;
 498       }
 499       switch (mode_helper->second) {
 500       case nir_var_shader_in:
 501          return emit_load_input_deref(var, instr);
 502       case nir_var_function_temp:
 503          return emit_load_function_temp(var, instr);
 504       default:
 505          cerr << "r600-nir: Unsupported mode" << mode_helper->second
 506               << "for src variable\n";
 507          return false;
 508       }
 509    }
 510    case nir_intrinsic_store_scratch:
 511       return emit_store_scratch(instr);
 512    case nir_intrinsic_load_scratch:
 513       return emit_load_scratch(instr);
 514    case nir_intrinsic_store_deref:
 515       return emit_store_deref(instr);
 516    case nir_intrinsic_load_uniform:
 517       return reserve_uniform(instr);
 518    case nir_intrinsic_discard:
 519    case nir_intrinsic_discard_if:
 520       return emit_discard_if(instr);
 521    case nir_intrinsic_load_ubo_r600:
 522       return emit_load_ubo(instr);
 523    case nir_intrinsic_atomic_counter_add:
 524    case nir_intrinsic_atomic_counter_and:
 525    case nir_intrinsic_atomic_counter_exchange:
 526    case nir_intrinsic_atomic_counter_max:
 527    case nir_intrinsic_atomic_counter_min:
 528    case nir_intrinsic_atomic_counter_or:
 529    case nir_intrinsic_atomic_counter_xor:
 530    case nir_intrinsic_atomic_counter_comp_swap:
 531    case nir_intrinsic_atomic_counter_read:
 532    case nir_intrinsic_atomic_counter_post_dec:
 533    case nir_intrinsic_atomic_counter_inc:
 534    case nir_intrinsic_atomic_counter_pre_dec:
 535    case nir_intrinsic_store_ssbo:
 536       m_sel.info.writes_memory = true;
 537       /* fallthrough */
 538    case nir_intrinsic_load_ssbo:
 539       return m_ssbo_instr.emit(&instr->instr);
 540       break;
 541    case nir_intrinsic_copy_deref:
 542    case nir_intrinsic_load_constant:
 543    case nir_intrinsic_load_input:
 544    case nir_intrinsic_store_output:
 545    case nir_intrinsic_load_tcs_in_param_base_r600:
 546       return emit_load_tcs_param_base(instr, 0);
 547    case nir_intrinsic_load_tcs_out_param_base_r600:
 548       return emit_load_tcs_param_base(instr, 16);
 549    case nir_intrinsic_load_local_shared_r600:
 550       return emit_load_local_shared(instr);
 551    case nir_intrinsic_store_local_shared_r600:
 552       return emit_store_local_shared(instr);
 553    case nir_intrinsic_control_barrier:
 554    case nir_intrinsic_memory_barrier_tcs_patch:
 555       return emit_barrier(instr);
 556
 557    default:
 558       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
 559       return false;
 560    }
 561    return false;
 562 }
 563
 564 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
 565 {
 566    return false;
 567 }
 568
 569 bool
 570 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
 571 {
 572    return false;
 573 }
 574
 575 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
 576 {
 577    AluInstruction *ir = new AluInstruction(op0_group_barrier);
 578    ir->set_flag(alu_last_instr);
 579    emit_instruction(ir);
 580    return true;
 581 }
 582
 583
 584 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
 585 {
 586    if (!dest.is_ssa) {
 587       auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
 588       if (as_last)
 589          ir->set_flag(alu_last_instr);
 590       emit_instruction(ir);
 591    } else {
 592       inject_register(dest.ssa.index, chan, value, true);
 593    }
 594    return true;
 595 }
 596
 597 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
 598 {
 599    PValue address = from_nir(instr->src[1], 0, 0);
 600
 601    auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
 602          swizzle_from_comps(instr->num_components));
 603
 604    int writemask = nir_intrinsic_write_mask(instr);
 605    int align = nir_intrinsic_align_mul(instr);
 606    int align_offset = nir_intrinsic_align_offset(instr);
 607
 608    WriteScratchInstruction *ir = nullptr;
 609    if (address->type() == Value::literal) {
 610       const auto& lv = static_cast<const LiteralValue&>(*address);
 611       ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
 612    } else {
 613       address = from_nir_with_fetch_constant(instr->src[1], 0);
 614       ir = new WriteScratchInstruction(address, value, align, align_offset,
 615                                        writemask, m_scratch_size);
 616    }
 617    emit_instruction(ir);
 618    sh_info().needs_scratch_space = 1;
 619    return true;
 620 }
 621
 622 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
 623 {
 624    PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
 625    std::array<PValue, 4> dst_val;
 626    for (int i = 0; i < 4; ++i)
 627       dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
 628
 629    GPRVector dst(dst_val);
 630    auto ir = new LoadFromScratch(dst, address, m_scratch_size);
 631    ir->prelude_append(new WaitAck(0));
 632    emit_instruction(ir);
 633    sh_info().needs_scratch_space = 1;
 634    return true;
 635 }
 636
 637 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
 638                                                                    unsigned mask,
 639                                                                    const GPRVector::Swizzle& swizzle,
 640                                                                    bool match)
 641 {
 642    bool use_same = true;
 643    GPRVector::Values v;
 644
 645    for (int i = 0; i < 4 && use_same; ++i)  {
 646       if ((1 << i) & mask) {
 647          if (swizzle[i] < 4) {
 648             v[i] = from_nir(src, swizzle[i]);
 649             assert(v[i]);
 650             if (v[i]->type() != Value::gpr)
 651                use_same = false;
 652             if (match && (v[i]->chan() != swizzle[i]))
 653                 use_same = false;
 654          }
 655       }
 656    }
 657
 658    if (use_same) {
 659       int i = 0;
 660       while (!v[i] && i < 4) ++i;
 661       assert(i < 4);
 662
 663       unsigned sel = v[i]->sel();
 664       for (i = 0; i < 4 && use_same; ++i) {
 665          if (!v[i])
 666             v[i] = PValue(new GPRValue(sel, swizzle[i]));
 667          else
 668             use_same &= v[i]->sel() == sel;
 669       }
 670    }
 671
 672    if (!use_same) {
 673       AluInstruction *ir = nullptr;
 674       int sel = allocate_temp_register();
 675       for (int i = 0; i < 4; ++i) {
 676          v[i] = PValue(new GPRValue(sel, swizzle[i]));
 677          if (swizzle[i] < 4 && (mask & (1 << i))) {
 678             ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
 679                                     EmitInstruction::write);
 680             emit_instruction(ir);
 681          }
 682       }
 683       if (ir)
 684          ir->set_flag(alu_last_instr);
 685    }
 686    return GPRVector(v);;
 687 }
 688
 689 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
 690 {
 691    nir_src& src0 = instr->src[0];
 692    nir_src& src1 = instr->src[1];
 693
 694    int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
 695    const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
 696
 697    int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
 698    const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
 699    if (literal0) {
 700       if (literal1) {
 701          uint bufid = literal0->value[0].u32;
 702          uint buf_ofs = literal1->value[0].u32 >> 4;
 703          int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
 704          AluInstruction *ir = nullptr;
 705          for (int i = 0; i < instr->num_components; ++i) {
 706             int cmp = buf_cmp + i;
 707             assert(cmp < 4);
 708             auto u = PValue(new UniformValue(512 +  buf_ofs, cmp, bufid + 1));
 709             if (instr->dest.is_ssa)
 710                add_uniform((instr->dest.ssa.index << 2) + i, u);
 711             else {
 712                ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
 713                emit_instruction(ir);
 714             }
 715          }
 716          if (ir)
 717             ir->set_flag(alu_last_instr);
 718          return true;
 719
 720       } else {
 721          /* literal0 is lost ...*/
 722          return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
 723       }
 724    } else {
 725       /* TODO: This can also be solved by using the CF indes on the ALU block, and
 726        * this would probably make sense when there are more then one loads with
 727        * the same buffer ID. */
 728       PValue bufid = from_nir(instr->src[0], 0, 0);
 729       PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
 730       GPRVector trgt;
 731       for (int i = 0; i < 4; ++i)
 732          trgt.set_reg_i(i, from_nir(instr->dest, i));
 733
 734       auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
 735                                      1, bufid, bim_zero);
 736
 737       emit_instruction(ir);
 738       for (int i = 0; i < instr->num_components ; ++i) {
 739          add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 740       }
 741       m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 742       return true;
 743    }
 744
 745 }
 746
 747 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
 748 {
 749    r600::sfn_log << SfnLog::instr << "emit '"
 750                  << *reinterpret_cast<nir_instr*>(instr)
 751                  << "' (" << __func__ << ")\n";
 752
 753    if (instr->intrinsic == nir_intrinsic_discard_if) {
 754       emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
 755                           {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
 756
 757    } else {
 758       emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
 759                        {Value::zero, Value::zero}, {alu_last_instr}));
 760    }
 761    m_sh_info.uses_kill = 1;
 762    return true;
 763 }
 764
 765 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
 766                                                    nir_intrinsic_instr* instr)
 767 {
 768    return do_emit_load_deref(var, instr);
 769 }
 770
 771 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
 772 {
 773    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 774                  << *reinterpret_cast<nir_instr*>(instr)
 775                  << "'\n";
 776
 777
 778    /* If the target register is a SSA register and the loading is not
 779     * indirect then we can do lazy loading, i.e. the uniform value can
 780     * be used directly. Otherwise we have to load the data for real
 781     * rigt away.
 782     */
 783
 784    /* Try to find the literal that defines the array index */
 785    const nir_load_const_instr* literal = nullptr;
 786    if (instr->src[0].is_ssa)
 787       literal = get_literal_constant(instr->src[0].ssa->index);
 788
 789    int base = nir_intrinsic_base(instr);
 790    if (literal) {
 791       AluInstruction *ir = nullptr;
 792
 793       for (int i = 0; i < instr->num_components ; ++i) {
 794          PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
 795          sfn_log << SfnLog::io << "uniform "
 796                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
 797
 798          if (instr->dest.is_ssa)
 799             add_uniform((instr->dest.ssa.index << 2) + i, u);
 800          else {
 801             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
 802                                                    u, {alu_write});
 803              emit_instruction(ir);
 804          }
 805       }
 806       if (ir)
 807          ir->set_flag(alu_last_instr);
 808    } else {
 809       PValue addr = from_nir(instr->src[0], 0, 0);
 810       return load_uniform_indirect(instr, addr, 16 * base, 0);
 811    }
 812    return true;
 813 }
 814
 815 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
 816 {
 817    if (!addr) {
 818       std::cerr << "r600-nir: don't know how uniform is addressed\n";
 819       return false;
 820    }
 821
 822    GPRVector trgt;
 823    for (int i = 0; i < 4; ++i)
 824       trgt.set_reg_i(i, from_nir(instr->dest, i));
 825
 826    if (addr->type() != Value::gpr) {
 827       emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
 828       addr = trgt.reg_i(0);
 829    }
 830
 831    /* FIXME: buffer index and index mode are not set correctly */
 832    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
 833                                   bufferid, PValue(), bim_none);
 834    emit_instruction(ir);
 835    for (int i = 0; i < instr->num_components ; ++i) {
 836       add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 837    }
 838    m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 839    return true;
 840 }
 841
 842 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
 843 {
 844    AluInstruction *ir = nullptr;
 845    for (int i = 0; i < literal->def.num_components ; ++i) {
 846       if (writemask & (1 << i)){
 847          PValue lsrc;
 848          switch (literal->def.bit_size) {
 849
 850          case 1:
 851             sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
 852             lsrc = literal->value[i].b ?
 853                      PValue(new LiteralValue( 0xffffffff, i)) :
 854                      Value::zero;
 855             break;
 856          case 32:
 857             sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
 858             if (literal->value[i].u32 == 0)
 859                lsrc = Value::zero;
 860             else if (literal->value[i].u32 == 1)
 861                lsrc = Value::one_i;
 862             else if (literal->value[i].f32 == 1.0f)
 863                lsrc = Value::one_f;
 864             else if (literal->value[i].f32 == 0.5f)
 865                lsrc = Value::zero_dot_5;
 866             else
 867                lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 868             break;
 869          default:
 870             sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
 871                     << " falling back to 32 bit\n";
 872             lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 873          }
 874          ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
 875
 876          emit_instruction(ir);
 877       }
 878    }
 879    return ir;
 880 }
 881
 882 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
 883 {
 884    PValue value = from_nir(src, component);
 885    if (value->type() != Value::gpr &&
 886        value->type() != Value::gpr_vector &&
 887        value->type() != Value::gpr_array_value) {
 888       PValue retval = get_temp_register();
 889       emit_instruction(new AluInstruction(op1_mov, retval, value,
 890                                           EmitInstruction::last_write));
 891       value = retval;
 892    }
 893    return value;
 894 }
 895
 896 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
 897 {
 898    auto out_var = get_deref_location(instr->src[0]);
 899    if (!out_var)
 900       return false;
 901
 902    return do_emit_store_deref(out_var, instr);
 903 }
 904
 905 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
 906 {
 907    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 908                  << *reinterpret_cast<nir_instr*>(instr)
 909                  << "'\n";
 910
 911    /* Give the specific shader type a chance to process this, i.e. Geometry and
 912     * tesselation shaders need specialized deref_array, for the other shaders
 913     * it is lowered.
 914     */
 915    if (emit_deref_instruction_override(instr))
 916       return true;
 917
 918    switch (instr->deref_type) {
 919    case nir_deref_type_var:
 920       set_var_address(instr);
 921       return true;
 922    case nir_deref_type_array:
 923    case nir_deref_type_array_wildcard:
 924    case nir_deref_type_struct:
 925    case nir_deref_type_cast:
 926    default:
 927       fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
 928    }
 929    return false;
 930 }
 931
 932 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
 933 {
 934    AluInstruction *ir = nullptr;
 935    PValue sv[4];
 936
 937    assert(src.src.is_ssa);
 938
 939    for (int i = 0; i < src.src.ssa->num_components ; ++i)  {
 940       unsigned uindex = (src.src.ssa->index << 2) + i;
 941       sv[i] = uniform(uindex);
 942       assert(sv[i]);
 943    }
 944
 945    for (int i = 0; i < src.src.ssa->num_components ; ++i) {
 946       ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
 947                               EmitInstruction::write);
 948       emit_instruction(ir);
 949    }
 950    if (ir)
 951       ir->set_flag(alu_last_instr);
 952 }
 953
 954
 955
 956 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
 957                                               std::vector<PValue> srcs,
 958                                               const std::set<AluModifiers>& m_flags)
 959 {
 960    AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
 961    emit_instruction(ir);
 962    return true;
 963 }
 964
 965 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
 966 {
 967    m_output_register_map[loc] = gpr;
 968 }
 969
 970 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
 971 {
 972    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 973    m_export_output.emit(PInstruction(ir));
 974 }
 975
 976 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
 977 {
 978    const GPRVector *retval = nullptr;
 979    auto val = m_output_register_map.find(location);
 980    if (val != m_output_register_map.end())
 981       retval =  val->second;
 982    return retval;
 983 }
 984
 985 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
 986 {
 987    r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
 988    m_inputs[pos] = var;
 989 }
 990
 991 void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
 992 {
 993    r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel <<  "\n";
 994    m_outputs[pos] = sel;
 995 }
 996
 997 void ShaderFromNirProcessor::append_block(int nesting_change)
 998 {
 999    m_nesting_depth += nesting_change;
1000    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1001 }
1002
1003 void ShaderFromNirProcessor::finalize()
1004 {
1005    do_finalize();
1006
1007    for (auto& i : m_inputs)
1008       m_sh_info.input[i.first].gpr = i.second->sel();
1009
1010    for (auto& i : m_outputs)
1011       m_sh_info.output[i.first].gpr = i.second;
1012
1013    m_output.push_back(m_export_output);
1014 }
1015
1016 }