src/gallium/drivers/r600/sfn/sfn_shader_base.cpp

   1 /* -*- mesa-c++  -*-
   2  *
   3  * Copyright (c) 2018 Collabora LTD
   4  *
   5  * Author: Gert Wollny <gert.wollny@collabora.com>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * on the rights to use, copy, modify, merge, publish, distribute, sub
  11  * license, and/or sell copies of the Software, and to permit persons to whom
  12  * the Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 #include "../r600_pipe.h"
  28 #include "../r600_shader.h"
  29 #include "sfn_shader_vertex.h"
  30
  31 #include "sfn_shader_compute.h"
  32 #include "sfn_shader_fragment.h"
  33 #include "sfn_shader_geometry.h"
  34 #include "sfn_liverange.h"
  35 #include "sfn_ir_to_assembly.h"
  36 #include "sfn_nir.h"
  37 #include "sfn_instruction_misc.h"
  38 #include "sfn_instruction_fetch.h"
  39
  40 #include <iostream>
  41
  42 #define ENABLE_DEBUG 1
  43
  44 #ifdef ENABLE_DEBUG
  45 #define DEBUG_SFN(X)  \
  46    do {\
  47       X; \
  48    } while (0)
  49 #else
  50 #define DEBUG_SFN(X)
  51 #endif
  52
  53 namespace r600 {
  54
  55 using namespace std;
  56
  57
  58 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
  59                                                r600_pipe_shader_selector& sel,
  60                                                r600_shader &sh_info, int scratch_size):
  61    m_processor_type(ptype),
  62    m_nesting_depth(0),
  63    m_block_number(0),
  64    m_export_output(0, -1),
  65    m_sh_info(sh_info),
  66    m_tex_instr(*this),
  67    m_alu_instr(*this),
  68    m_ssbo_instr(*this),
  69    m_pending_else(nullptr),
  70    m_scratch_size(scratch_size),
  71    m_next_hwatomic_loc(0),
  72    m_sel(sel)
  73 {
  74    m_sh_info.processor_type = ptype;
  75 }
  76
  77
  78 ShaderFromNirProcessor::~ShaderFromNirProcessor()
  79 {
  80 }
  81
  82 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
  83 {
  84    switch (instr->type) {
  85    case nir_instr_type_tex: {
  86       nir_tex_instr *t = nir_instr_as_tex(instr);
  87       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
  88          sh_info().uses_tex_buffers = true;
  89    }
  90    default:
  91       ;
  92    }
  93
  94    return scan_sysvalue_access(instr);
  95 }
  96
  97 static void remap_shader_info(r600_shader& sh_info,
  98                               std::vector<rename_reg_pair>& map,
  99                               UNUSED ValueMap& values)
 100 {
 101    for (unsigned i = 0; i < sh_info.ninput; ++i) {
 102       sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
 103               << " of map.size()\n";
 104
 105       assert(sh_info.input[i].gpr < map.size());
 106       auto new_index = map[sh_info.input[i].gpr];
 107       if (new_index.valid)
 108          sh_info.input[i].gpr = new_index.new_reg;
 109       map[sh_info.input[i].gpr].used = true;
 110    }
 111
 112    for (unsigned i = 0; i < sh_info.noutput; ++i) {
 113       assert(sh_info.output[i].gpr < map.size());
 114       auto new_index = map[sh_info.output[i].gpr];
 115       if (new_index.valid)
 116          sh_info.output[i].gpr = new_index.new_reg;
 117       map[sh_info.output[i].gpr].used = true;
 118    }
 119 }
 120
 121 void ShaderFromNirProcessor::remap_registers()
 122 {
 123    // register renumbering
 124    auto rc = register_count();
 125    if (!rc)
 126       return;
 127
 128    std::vector<register_live_range> register_live_ranges(rc);
 129
 130    auto temp_register_map = get_temp_registers();
 131
 132    Shader sh{m_output, temp_register_map};
 133    LiverangeEvaluator().run(sh, register_live_ranges);
 134    auto register_map = get_temp_registers_remapping(register_live_ranges);
 135
 136    sfn_log << SfnLog::merge << "=========Mapping===========\n";
 137    for (size_t  i = 0; i < register_map.size(); ++i)
 138       if (register_map[i].valid)
 139          sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
 140
 141    ValueRemapper vmap0(register_map, temp_register_map);
 142    for (auto& block: m_output)
 143       block.remap_registers(vmap0);
 144
 145    remap_shader_info(m_sh_info, register_map, temp_register_map);
 146
 147    /* Mark inputs as used registers, these registers should no be remapped */
 148    for (auto& v: sh.m_temp) {
 149       if (v.second->type() == Value::gpr) {
 150          const auto& g = static_cast<const GPRValue&>(*v.second);
 151          if (g.is_input())
 152             register_map[g.sel()].used = true;
 153       }
 154    }
 155
 156    int new_index = 0;
 157    for (auto& i : register_map) {
 158       i.valid = i.used;
 159       if (i.used)
 160          i.new_reg = new_index++;
 161    }
 162
 163    ValueRemapper vmap1(register_map, temp_register_map);
 164    for (auto& ir: m_output)
 165       ir.remap_registers(vmap1);
 166
 167    remap_shader_info(m_sh_info, register_map, temp_register_map);
 168 }
 169
 170 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
 171 {
 172    // m_uniform_type_map
 173    m_uniform_type_map[uniform->data.location] = uniform->type;
 174
 175    if (uniform->type->contains_atomic()) {
 176       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
 177       sh_info().nhwatomic += natomics;
 178
 179       if (uniform->type->is_array())
 180          sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
 181
 182       sh_info().uses_atomics = 1;
 183
 184       struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
 185       ++sh_info().nhwatomic_ranges;
 186       atom.buffer_id = uniform->data.binding;
 187       atom.hw_idx = m_next_hwatomic_loc;
 188       atom.start = m_next_hwatomic_loc;
 189       atom.end = atom.start + natomics - 1;
 190       m_next_hwatomic_loc = atom.end + 1;
 191       //atom.array_id = uniform->type->is_array() ? 1 : 0;
 192
 193       m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
 194
 195       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
 196               << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
 197    }
 198
 199    if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
 200       sh_info().uses_images = 1;
 201    }
 202
 203    return true;
 204 }
 205
 206 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
 207 {
 208    return do_process_inputs(input);
 209 }
 210
 211 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
 212 {
 213    return do_process_outputs(output);
 214 }
 215
 216 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
 217 {
 218    nir_variable *var = nir_deref_instr_get_variable(instr);
 219
 220    assert(instr->mode == nir_var_function_temp);
 221    assert(glsl_type_is_array(var->type));
 222
 223    // add an alias for the index to the register(s);
 224
 225
 226 }
 227
 228 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
 229 {
 230    auto& dest = instr->dest;
 231    unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
 232    m_var_mode[instr->var] = instr->mode;
 233    m_var_derefs[index] = instr->var;
 234
 235    sfn_log << SfnLog::io << "Add var deref:" << index
 236            << " with DDL:" << instr->var->data.driver_location << "\n";
 237 }
 238
 239 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
 240 {
 241    switch (io.name) {
 242    case TGSI_SEMANTIC_POSITION:
 243    case TGSI_SEMANTIC_PSIZE:
 244    case TGSI_SEMANTIC_EDGEFLAG:
 245    case TGSI_SEMANTIC_FACE:
 246    case TGSI_SEMANTIC_SAMPLEMASK:
 247    case TGSI_SEMANTIC_CLIPVERTEX:
 248       io.spi_sid = 0;
 249       break;
 250    case TGSI_SEMANTIC_GENERIC:
 251       io.spi_sid = io.sid + 1;
 252       break;
 253    default:
 254       /* For non-generic params - pack name and sid into 8 bits */
 255       io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
 256    }
 257 }
 258
 259 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
 260 {
 261    unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
 262
 263    sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
 264
 265    auto v = m_var_derefs.find(index);
 266    if (v != m_var_derefs.end())
 267       return v->second;
 268
 269      fprintf(stderr, "R600: could not find deref with index %d\n", index);
 270
 271      return nullptr;
 272
 273    /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
 274    return  nir_deref_instr_get_variable(deref); */
 275 }
 276
 277 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
 278 {
 279    return m_tex_instr.emit(instr);
 280 }
 281
 282 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
 283 {
 284    if (m_pending_else) {
 285       append_block(-1);
 286       m_output.back().emit(PInstruction(m_pending_else));
 287       append_block(1);
 288       m_pending_else = nullptr;
 289    }
 290
 291    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 292    if (m_output.empty())
 293       append_block(0);
 294
 295    m_output.back().emit(Instruction::Pointer(ir));
 296 }
 297
 298 void ShaderFromNirProcessor::emit_shader_start()
 299 {
 300    /* placeholder, may become an abstract method */
 301 }
 302
 303 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
 304 {
 305    switch (instr->type) {
 306    case nir_jump_break: {
 307       auto b = new LoopBreakInstruction();
 308       emit_instruction(b);
 309       return true;
 310    }
 311    case nir_jump_continue: {
 312       auto  b = new LoopContInstruction();
 313       emit_instruction(b);
 314       return true;
 315    }
 316    default: {
 317       nir_instr *i = reinterpret_cast<nir_instr*>(instr);
 318       sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
 319       return false;
 320    }
 321    }
 322    return true;
 323 }
 324
 325 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
 326 {
 327    return m_alu_instr.emit(instr);
 328 }
 329
 330 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
 331 {
 332    return false;
 333 }
 334
 335 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
 336 {
 337    LoopBeginInstruction *loop = new LoopBeginInstruction();
 338    emit_instruction(loop);
 339    m_loop_begin_block_map[loop_id] = loop;
 340    append_block(1);
 341    return true;
 342 }
 343 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
 344 {
 345    auto start = m_loop_begin_block_map.find(loop_id);
 346    if (start == m_loop_begin_block_map.end()) {
 347       sfn_log << SfnLog::err  << "End loop: Loop start for "
 348               << loop_id << "  not found\n";
 349       return false;
 350    }
 351    m_nesting_depth--;
 352    m_block_number++;
 353    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
 354    LoopEndInstruction *loop = new LoopEndInstruction(start->second);
 355    emit_instruction(loop);
 356
 357    m_loop_begin_block_map.erase(start);
 358    return true;
 359 }
 360
 361 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
 362 {
 363
 364    auto value = from_nir(if_stmt->condition, 0, 0);
 365    AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
 366                                              value, Value::zero, EmitInstruction::last);
 367    pred->set_flag(alu_update_exec);
 368    pred->set_flag(alu_update_pred);
 369    pred->set_cf_type(cf_alu_push_before);
 370
 371    append_block(1);
 372
 373    IfInstruction *ir = new IfInstruction(pred);
 374    emit_instruction(ir);
 375    assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
 376    m_if_block_start_map[if_id] = ir;
 377    return true;
 378 }
 379
 380 bool ShaderFromNirProcessor::emit_else_start(int if_id)
 381 {
 382    auto iif = m_if_block_start_map.find(if_id);
 383    if (iif == m_if_block_start_map.end()) {
 384       std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
 385       return false;
 386    }
 387
 388    if (iif->second->type() != Instruction::cond_if) {
 389       std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
 390       return false;
 391    }
 392    IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
 393    ElseInstruction *ir = new ElseInstruction(if_instr);
 394    m_if_block_start_map[if_id] = ir;
 395    m_pending_else = ir;
 396
 397    return true;
 398 }
 399
 400 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
 401 {
 402    auto ifelse = m_if_block_start_map.find(if_id);
 403    if (ifelse == m_if_block_start_map.end()) {
 404       std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
 405       return false;
 406    }
 407
 408    if (ifelse->second->type() != Instruction::cond_if &&
 409        ifelse->second->type() != Instruction::cond_else) {
 410       std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
 411       return false;
 412    }
 413    /* Clear pending else, if the else branch was empty, non will be emitted */
 414
 415    m_pending_else = nullptr;
 416
 417    append_block(-1);
 418    IfElseEndInstruction *ir = new IfElseEndInstruction();
 419    emit_instruction(ir);
 420
 421    return true;
 422 }
 423
 424 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
 425 {
 426    r600::sfn_log << SfnLog::instr << "emit '"
 427                  << *reinterpret_cast<nir_instr*>(instr)
 428                  << "' (" << __func__ << ")\n";
 429
 430    if (emit_intrinsic_instruction_override(instr))
 431       return true;
 432
 433    switch (instr->intrinsic) {
 434    case nir_intrinsic_load_deref: {
 435       auto var = get_deref_location(instr->src[0]);
 436       if (!var)
 437          return false;
 438       auto mode_helper = m_var_mode.find(var);
 439       if (mode_helper == m_var_mode.end()) {
 440          cerr << "r600-nir: variable '" << var->name << "' not found\n";
 441          return false;
 442       }
 443       switch (mode_helper->second) {
 444       case nir_var_shader_in:
 445          return emit_load_input_deref(var, instr);
 446       case nir_var_function_temp:
 447          return emit_load_function_temp(var, instr);
 448       default:
 449          cerr << "r600-nir: Unsupported mode" << mode_helper->second
 450               << "for src variable\n";
 451          return false;
 452       }
 453    }
 454    case nir_intrinsic_store_scratch:
 455       return emit_store_scratch(instr);
 456    case nir_intrinsic_load_scratch:
 457       return emit_load_scratch(instr);
 458    case nir_intrinsic_store_deref:
 459       return emit_store_deref(instr);
 460    case nir_intrinsic_load_uniform:
 461       return reserve_uniform(instr);
 462    case nir_intrinsic_discard:
 463    case nir_intrinsic_discard_if:
 464       return emit_discard_if(instr);
 465    case nir_intrinsic_load_ubo_r600:
 466       return emit_load_ubo(instr);
 467    case nir_intrinsic_atomic_counter_add:
 468    case nir_intrinsic_atomic_counter_and:
 469    case nir_intrinsic_atomic_counter_exchange:
 470    case nir_intrinsic_atomic_counter_max:
 471    case nir_intrinsic_atomic_counter_min:
 472    case nir_intrinsic_atomic_counter_or:
 473    case nir_intrinsic_atomic_counter_xor:
 474    case nir_intrinsic_atomic_counter_comp_swap:
 475    case nir_intrinsic_atomic_counter_read:
 476    case nir_intrinsic_atomic_counter_post_dec:
 477    case nir_intrinsic_atomic_counter_inc:
 478    case nir_intrinsic_atomic_counter_pre_dec:
 479    case nir_intrinsic_store_ssbo:
 480       m_sel.info.writes_memory = true;
 481       /* fallthrough */
 482    case nir_intrinsic_load_ssbo:
 483       return m_ssbo_instr.emit(&instr->instr);
 484       break;
 485    case nir_intrinsic_copy_deref:
 486    case nir_intrinsic_load_constant:
 487    case nir_intrinsic_load_input:
 488    case nir_intrinsic_store_output:
 489    default:
 490       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
 491       return false;
 492    }
 493    return false;
 494 }
 495
 496 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
 497 {
 498    return false;
 499 }
 500
 501 bool
 502 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
 503 {
 504    return false;
 505 }
 506
 507 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
 508 {
 509    if (!dest.is_ssa) {
 510       auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
 511       if (as_last)
 512          ir->set_flag(alu_last_instr);
 513       emit_instruction(ir);
 514    } else {
 515       inject_register(dest.ssa.index, chan, value, true);
 516    }
 517    return true;
 518 }
 519
 520 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
 521 {
 522    PValue address = from_nir(instr->src[1], 0, 0);
 523
 524    std::unique_ptr<GPRVector> vec(vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
 525                                   swizzle_from_mask(instr->num_components)));
 526    GPRVector value(*vec);
 527
 528    int writemask = nir_intrinsic_write_mask(instr);
 529    int align = nir_intrinsic_align_mul(instr);
 530    int align_offset = nir_intrinsic_align_offset(instr);
 531
 532    WriteScratchInstruction *ir = nullptr;
 533    if (address->type() == Value::literal) {
 534       const auto& lv = static_cast<const LiteralValue&>(*address);
 535       ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
 536    } else {
 537       address = from_nir_with_fetch_constant(instr->src[1], 0);
 538       ir = new WriteScratchInstruction(address, value, align, align_offset,
 539                                        writemask, m_scratch_size);
 540    }
 541    emit_instruction(ir);
 542    sh_info().needs_scratch_space = 1;
 543    return true;
 544 }
 545
 546 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
 547 {
 548    PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
 549    std::array<PValue, 4> dst_val;
 550    for (int i = 0; i < 4; ++i)
 551       dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
 552
 553    GPRVector dst(dst_val);
 554    auto ir = new LoadFromScratch(dst, address, m_scratch_size);
 555    ir->prelude_append(new WaitAck(0));
 556    emit_instruction(ir);
 557    sh_info().needs_scratch_space = 1;
 558    return true;
 559 }
 560
 561 GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
 562                                                                     UNUSED unsigned mask,
 563                                                                     const GPRVector::Swizzle& swizzle)
 564 {
 565    GPRVector *result = nullptr;
 566    int sel = lookup_register_index(src);
 567    if (sel >= 0 && from_nir(src, 0)->type() == Value::gpr &&
 568        from_nir(src, 0)->chan() == 0) {
 569       /* If the x-channel is really an x-channel register then we are pretty
 570        * save that the value come like we need them */
 571       result = new GPRVector(from_nir(src, 0)->sel(), swizzle);
 572    } else {
 573       AluInstruction *ir = nullptr;
 574       int sel = allocate_temp_register();
 575       GPRVector::Values v;
 576       for (int i = 0; i < 4; ++i) {
 577          v[i] = PValue(new GPRValue(sel, swizzle[i]));
 578          if (swizzle[i] < 4 && (mask & (1 << i))) {
 579             ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
 580                                     EmitInstruction::write);
 581             emit_instruction(ir);
 582          }
 583       }
 584       if (ir)
 585          ir->set_flag(alu_last_instr);
 586
 587       result = new GPRVector(v);
 588    }
 589    return result;
 590 }
 591
 592 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
 593 {
 594    nir_src& src0 = instr->src[0];
 595    nir_src& src1 = instr->src[1];
 596
 597    int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
 598    const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
 599
 600    int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
 601    const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
 602    if (literal0) {
 603       if (literal1) {
 604          uint bufid = literal0->value[0].u32;
 605          uint buf_ofs = literal1->value[0].u32 >> 4;
 606          int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
 607          AluInstruction *ir = nullptr;
 608          for (int i = 0; i < instr->num_components; ++i) {
 609             int cmp = buf_cmp + i;
 610             assert(cmp < 4);
 611             auto u = PValue(new UniformValue(512 +  buf_ofs, cmp, bufid + 1));
 612             if (instr->dest.is_ssa)
 613                add_uniform((instr->dest.ssa.index << 2) + i, u);
 614             else {
 615                ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
 616                emit_instruction(ir);
 617             }
 618          }
 619          if (ir)
 620             ir->set_flag(alu_last_instr);
 621          return true;
 622
 623       } else {
 624          /* literal0 is lost ...*/
 625          return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
 626       }
 627    } else {
 628       /* TODO: This can also be solved by using the CF indes on the ALU block, and
 629        * this would probably make sense when there are more then one loads with
 630        * the same buffer ID. */
 631       PValue bufid = from_nir(instr->src[0], 0, 0);
 632       PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
 633       GPRVector trgt;
 634       for (int i = 0; i < 4; ++i)
 635          trgt.set_reg_i(i, from_nir(instr->dest, i));
 636
 637       auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
 638                                      1, bufid, bim_zero);
 639
 640       emit_instruction(ir);
 641       for (int i = 0; i < instr->num_components ; ++i) {
 642          add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 643       }
 644       m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 645       return true;
 646    }
 647
 648 }
 649
 650 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
 651 {
 652    r600::sfn_log << SfnLog::instr << "emit '"
 653                  << *reinterpret_cast<nir_instr*>(instr)
 654                  << "' (" << __func__ << ")\n";
 655
 656    if (instr->intrinsic == nir_intrinsic_discard_if) {
 657       emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
 658                           {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
 659
 660    } else {
 661       emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
 662                        {Value::zero, Value::zero}, {alu_last_instr}));
 663    }
 664    m_sh_info.uses_kill = 1;
 665    return true;
 666 }
 667
 668 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
 669                                                    nir_intrinsic_instr* instr)
 670 {
 671    return do_emit_load_deref(var, instr);
 672 }
 673
 674 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
 675 {
 676    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 677                  << *reinterpret_cast<nir_instr*>(instr)
 678                  << "'\n";
 679
 680
 681    /* If the target register is a SSA register and the loading is not
 682     * indirect then we can do lazy loading, i.e. the uniform value can
 683     * be used directly. Otherwise we have to load the data for real
 684     * rigt away.
 685     */
 686
 687    /* Try to find the literal that defines the array index */
 688    const nir_load_const_instr* literal = nullptr;
 689    if (instr->src[0].is_ssa)
 690       literal = get_literal_constant(instr->src[0].ssa->index);
 691
 692    int base = nir_intrinsic_base(instr);
 693    if (literal) {
 694       AluInstruction *ir = nullptr;
 695
 696       for (int i = 0; i < instr->num_components ; ++i) {
 697          PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
 698          sfn_log << SfnLog::io << "uniform "
 699                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
 700
 701          if (instr->dest.is_ssa)
 702             add_uniform((instr->dest.ssa.index << 2) + i, u);
 703          else {
 704             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
 705                                                    u, {alu_write});
 706              emit_instruction(ir);
 707          }
 708       }
 709       if (ir)
 710          ir->set_flag(alu_last_instr);
 711    } else {
 712       PValue addr = from_nir(instr->src[0], 0, 0);
 713       return load_uniform_indirect(instr, addr, 16 * base, 0);
 714    }
 715    return true;
 716 }
 717
 718 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
 719 {
 720    if (!addr) {
 721       std::cerr << "r600-nir: don't know how uniform is addressed\n";
 722       return false;
 723    }
 724
 725    GPRVector trgt;
 726    for (int i = 0; i < 4; ++i)
 727       trgt.set_reg_i(i, from_nir(instr->dest, i));
 728
 729    if (addr->type() != Value::gpr) {
 730       emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
 731       addr = trgt.reg_i(0);
 732    }
 733
 734    /* FIXME: buffer index and index mode are not set correctly */
 735    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
 736                                   bufferid, PValue(), bim_none);
 737    emit_instruction(ir);
 738    for (int i = 0; i < instr->num_components ; ++i) {
 739       add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 740    }
 741    m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 742    return true;
 743 }
 744
 745 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
 746 {
 747    AluInstruction *ir = nullptr;
 748    for (int i = 0; i < literal->def.num_components ; ++i) {
 749       if (writemask & (1 << i)){
 750          PValue lsrc;
 751          switch (literal->def.bit_size) {
 752
 753          case 1:
 754             sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
 755             lsrc = literal->value[i].b ?
 756                      PValue(new LiteralValue( 0xffffffff, i)) :
 757                      Value::zero;
 758             break;
 759          case 32:
 760             sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
 761             if (literal->value[i].u32 == 0)
 762                lsrc = Value::zero;
 763             else if (literal->value[i].u32 == 1)
 764                lsrc = Value::one_i;
 765             else if (literal->value[i].f32 == 1.0f)
 766                lsrc = Value::one_f;
 767             else if (literal->value[i].f32 == 0.5f)
 768                lsrc = Value::zero_dot_5;
 769             else
 770                lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 771             break;
 772          default:
 773             sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
 774                     << " falling back to 32 bit\n";
 775             lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 776          }
 777          ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
 778
 779          emit_instruction(ir);
 780       }
 781    }
 782    return ir;
 783 }
 784
 785 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
 786 {
 787    PValue value = from_nir(src, component);
 788    if (value->type() != Value::gpr &&
 789        value->type() != Value::gpr_vector &&
 790        value->type() != Value::gpr_array_value) {
 791       unsigned temp = allocate_temp_register();
 792       PValue retval(new GPRValue(temp, component));
 793       emit_instruction(new AluInstruction(op1_mov, retval, value,
 794                                           EmitInstruction::last_write));
 795       value = retval;
 796    }
 797    return value;
 798 }
 799
 800 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
 801 {
 802    auto out_var = get_deref_location(instr->src[0]);
 803    if (!out_var)
 804       return false;
 805
 806    return do_emit_store_deref(out_var, instr);
 807 }
 808
 809 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
 810 {
 811    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 812                  << *reinterpret_cast<nir_instr*>(instr)
 813                  << "'\n";
 814
 815    /* Give the specific shader type a chance to process this, i.e. Geometry and
 816     * tesselation shaders need specialized deref_array, for the other shaders
 817     * it is lowered.
 818     */
 819    if (emit_deref_instruction_override(instr))
 820       return true;
 821
 822    switch (instr->deref_type) {
 823    case nir_deref_type_var:
 824       set_var_address(instr);
 825       return true;
 826    case nir_deref_type_array:
 827    case nir_deref_type_array_wildcard:
 828    case nir_deref_type_struct:
 829    case nir_deref_type_cast:
 830    default:
 831       fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
 832    }
 833    return false;
 834 }
 835
 836 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
 837 {
 838    AluInstruction *ir = nullptr;
 839    PValue sv[4];
 840
 841    assert(src.src.is_ssa);
 842
 843    for (int i = 0; i < src.src.ssa->num_components ; ++i)  {
 844       unsigned uindex = (src.src.ssa->index << 2) + i;
 845       sv[i] = uniform(uindex);
 846       assert(sv[i]);
 847    }
 848
 849    for (int i = 0; i < src.src.ssa->num_components ; ++i) {
 850       ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
 851                               EmitInstruction::write);
 852       emit_instruction(ir);
 853    }
 854    if (ir)
 855       ir->set_flag(alu_last_instr);
 856 }
 857
 858
 859
 860 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
 861                                               std::vector<PValue> srcs,
 862                                               const std::set<AluModifiers>& m_flags)
 863 {
 864    AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
 865    emit_instruction(ir);
 866    return true;
 867 }
 868
 869 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
 870 {
 871    m_output_register_map[loc] = gpr;
 872 }
 873
 874 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
 875 {
 876    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 877    m_export_output.emit(PInstruction(ir));
 878 }
 879
 880 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
 881 {
 882    const GPRVector *retval = nullptr;
 883    auto val = m_output_register_map.find(location);
 884    if (val != m_output_register_map.end())
 885       retval =  val->second;
 886    return retval;
 887 }
 888
 889 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
 890 {
 891    r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
 892    m_inputs[pos] = var;
 893 }
 894
 895 void ShaderFromNirProcessor::set_output(unsigned pos, PValue var)
 896 {
 897    r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << *var <<  "\n";
 898    m_outputs[pos] = var;
 899 }
 900
 901 void ShaderFromNirProcessor::append_block(int nesting_change)
 902 {
 903    m_nesting_depth += nesting_change;
 904    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
 905 }
 906
 907 void ShaderFromNirProcessor::finalize()
 908 {
 909    do_finalize();
 910
 911    for (auto& i : m_inputs)
 912       m_sh_info.input[i.first].gpr = i.second->sel();
 913
 914    for (auto& i : m_outputs)
 915       m_sh_info.output[i.first].gpr = i.second->sel();
 916
 917    m_output.push_back(m_export_output);
 918 }
 919
 920 }