src/gallium/drivers/r600/sfn/sfn_shader_base.cpp

   1 /* -*- mesa-c++  -*-
   2  *
   3  * Copyright (c) 2018 Collabora LTD
   4  *
   5  * Author: Gert Wollny <gert.wollny@collabora.com>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * on the rights to use, copy, modify, merge, publish, distribute, sub
  11  * license, and/or sell copies of the Software, and to permit persons to whom
  12  * the Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 #include "../r600_pipe.h"
  28 #include "../r600_shader.h"
  29 #include "sfn_shader_vertex.h"
  30
  31 #include "sfn_shader_compute.h"
  32 #include "sfn_shader_fragment.h"
  33 #include "sfn_shader_geometry.h"
  34 #include "sfn_liverange.h"
  35 #include "sfn_ir_to_assembly.h"
  36 #include "sfn_nir.h"
  37 #include "sfn_instruction_misc.h"
  38 #include "sfn_instruction_fetch.h"
  39
  40 #include <iostream>
  41
  42 #define ENABLE_DEBUG 1
  43
  44 #ifdef ENABLE_DEBUG
  45 #define DEBUG_SFN(X)  \
  46    do {\
  47       X; \
  48    } while (0)
  49 #else
  50 #define DEBUG_SFN(X)
  51 #endif
  52
  53 namespace r600 {
  54
  55 using namespace std;
  56
  57
  58 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
  59                                                r600_pipe_shader_selector& sel,
  60                                                r600_shader &sh_info, int scratch_size):
  61    m_processor_type(ptype),
  62    m_sh_info(sh_info),
  63    m_tex_instr(*this),
  64    m_alu_instr(*this),
  65    m_ssbo_instr(*this),
  66    m_pending_else(nullptr),
  67    m_scratch_size(scratch_size),
  68    m_next_hwatomic_loc(0),
  69    m_sel(sel)
  70 {
  71    m_sh_info.processor_type = ptype;
  72 }
  73
  74
  75 ShaderFromNirProcessor::~ShaderFromNirProcessor()
  76 {
  77 }
  78
  79 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
  80 {
  81    switch (instr->type) {
  82    case nir_instr_type_tex: {
  83       nir_tex_instr *t = nir_instr_as_tex(instr);
  84       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
  85          sh_info().uses_tex_buffers = true;
  86    }
  87    default:
  88       ;
  89    }
  90
  91    return scan_sysvalue_access(instr);
  92 }
  93
  94 static void remap_shader_info(r600_shader& sh_info,
  95                               std::vector<rename_reg_pair>& map,
  96                               UNUSED ValueMap& values)
  97 {
  98    for (unsigned i = 0; i < sh_info.ninput; ++i) {
  99       sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
 100               << " of map.size()\n";
 101
 102       assert(sh_info.input[i].gpr < map.size());
 103       auto new_index = map[sh_info.input[i].gpr];
 104       if (new_index.valid)
 105          sh_info.input[i].gpr = new_index.new_reg;
 106       map[sh_info.input[i].gpr].used = true;
 107    }
 108
 109    for (unsigned i = 0; i < sh_info.noutput; ++i) {
 110       assert(sh_info.output[i].gpr < map.size());
 111       auto new_index = map[sh_info.output[i].gpr];
 112       if (new_index.valid)
 113          sh_info.output[i].gpr = new_index.new_reg;
 114       map[sh_info.output[i].gpr].used = true;
 115    }
 116 }
 117
 118 void ShaderFromNirProcessor::remap_registers()
 119 {
 120    // register renumbering
 121    auto rc = register_count();
 122    if (!rc)
 123       return;
 124
 125    std::vector<register_live_range> register_live_ranges(rc);
 126
 127    auto temp_register_map = get_temp_registers();
 128
 129    Shader sh{m_output, temp_register_map};
 130    LiverangeEvaluator().run(sh, register_live_ranges);
 131    auto register_map = get_temp_registers_remapping(register_live_ranges);
 132
 133    sfn_log << SfnLog::merge << "=========Mapping===========\n";
 134    for (size_t  i = 0; i < register_map.size(); ++i)
 135       if (register_map[i].valid)
 136          sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
 137
 138
 139    ValueRemapper vmap0(register_map, temp_register_map);
 140    for (auto ir: m_output)
 141       ir->remap_registers(vmap0);
 142
 143    remap_shader_info(m_sh_info, register_map, temp_register_map);
 144
 145    /* Mark inputs as used registers, these registers should no be remapped */
 146    for (auto& v: sh.m_temp) {
 147       if (v.second->type() == Value::gpr) {
 148          const auto& g = static_cast<const GPRValue&>(*v.second);
 149          if (g.is_input())
 150             register_map[g.sel()].used = true;
 151       }
 152    }
 153
 154    int new_index = 0;
 155    for (auto& i : register_map) {
 156       i.valid = i.used;
 157       if (i.used)
 158          i.new_reg = new_index++;
 159    }
 160
 161    ValueRemapper vmap1(register_map, temp_register_map);
 162    for (auto ir: m_output)
 163       ir->remap_registers(vmap1);
 164
 165    remap_shader_info(m_sh_info, register_map, temp_register_map);
 166 }
 167
 168 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
 169 {
 170    // m_uniform_type_map
 171    m_uniform_type_map[uniform->data.location] = uniform->type;
 172
 173    if (uniform->type->contains_atomic()) {
 174       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
 175       sh_info().nhwatomic += natomics;
 176
 177       if (uniform->type->is_array())
 178          sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
 179
 180       sh_info().uses_atomics = 1;
 181
 182       struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
 183       ++sh_info().nhwatomic_ranges;
 184       atom.buffer_id = uniform->data.binding;
 185       atom.hw_idx = m_next_hwatomic_loc;
 186       atom.start = m_next_hwatomic_loc;
 187       atom.end = atom.start + natomics - 1;
 188       m_next_hwatomic_loc = atom.end + 1;
 189       //atom.array_id = uniform->type->is_array() ? 1 : 0;
 190
 191       m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
 192
 193       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
 194               << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
 195    }
 196
 197    if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
 198       sh_info().uses_images = 1;
 199    }
 200
 201    return true;
 202 }
 203
 204 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
 205 {
 206    return do_process_inputs(input);
 207 }
 208
 209 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
 210 {
 211    return do_process_outputs(output);
 212 }
 213
 214 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
 215 {
 216    nir_variable *var = nir_deref_instr_get_variable(instr);
 217
 218    assert(instr->mode == nir_var_function_temp);
 219    assert(glsl_type_is_array(var->type));
 220
 221    // add an alias for the index to the register(s);
 222
 223
 224 }
 225
 226 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
 227 {
 228    auto& dest = instr->dest;
 229    unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
 230    m_var_mode[instr->var] = instr->mode;
 231    m_var_derefs[index] = instr->var;
 232
 233    sfn_log << SfnLog::io << "Add var deref:" << index
 234            << " with DDL:" << instr->var->data.driver_location << "\n";
 235 }
 236
 237 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
 238 {
 239    switch (io.name) {
 240    case TGSI_SEMANTIC_POSITION:
 241    case TGSI_SEMANTIC_PSIZE:
 242    case TGSI_SEMANTIC_EDGEFLAG:
 243    case TGSI_SEMANTIC_FACE:
 244    case TGSI_SEMANTIC_SAMPLEMASK:
 245    case TGSI_SEMANTIC_CLIPVERTEX:
 246       io.spi_sid = 0;
 247       break;
 248    case TGSI_SEMANTIC_GENERIC:
 249       io.spi_sid = io.sid + 1;
 250       break;
 251    default:
 252       /* For non-generic params - pack name and sid into 8 bits */
 253       io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
 254    }
 255 }
 256
 257 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
 258 {
 259    unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
 260
 261    sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
 262
 263    auto v = m_var_derefs.find(index);
 264    if (v != m_var_derefs.end())
 265       return v->second;
 266
 267      fprintf(stderr, "R600: could not find deref with index %d\n", index);
 268
 269      return nullptr;
 270
 271    /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
 272    return  nir_deref_instr_get_variable(deref); */
 273 }
 274
 275 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
 276 {
 277    return m_tex_instr.emit(instr);
 278 }
 279
 280 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
 281 {
 282    if (m_pending_else) {
 283       m_output.push_back(PInstruction(m_pending_else));
 284       m_pending_else = nullptr;
 285    }
 286
 287    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 288    m_output.push_back(Instruction::Pointer(ir));
 289 }
 290
 291 void ShaderFromNirProcessor::emit_shader_start()
 292 {
 293    /* placeholder, may become an abstract method */
 294 }
 295
 296 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
 297 {
 298    switch (instr->type) {
 299    case nir_jump_break: {
 300       auto b = new LoopBreakInstruction();
 301       emit_instruction(b);
 302       return true;
 303    }
 304    case nir_jump_continue: {
 305       auto  b = new LoopContInstruction();
 306       emit_instruction(b);
 307       return true;
 308    }
 309    default: {
 310       nir_instr *i = reinterpret_cast<nir_instr*>(instr);
 311       sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
 312       return false;
 313    }
 314    }
 315    return true;
 316 }
 317
 318 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
 319 {
 320    return m_alu_instr.emit(instr);
 321 }
 322
 323 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
 324 {
 325    return false;
 326 }
 327
 328 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
 329 {
 330    LoopBeginInstruction *loop = new LoopBeginInstruction();
 331    emit_instruction(loop);
 332    m_loop_begin_block_map[loop_id] = loop;
 333    return true;
 334 }
 335 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
 336 {
 337    auto start = m_loop_begin_block_map.find(loop_id);
 338    if (start == m_loop_begin_block_map.end()) {
 339       sfn_log << SfnLog::err  << "End loop: Loop start for "
 340               << loop_id << "  not found\n";
 341       return false;
 342    }
 343    LoopEndInstruction *loop = new LoopEndInstruction(start->second);
 344    emit_instruction(loop);
 345
 346    m_loop_begin_block_map.erase(start);
 347    return true;
 348 }
 349
 350 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
 351 {
 352
 353    auto value = from_nir(if_stmt->condition, 0, 0);
 354    AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
 355                                              value, Value::zero, EmitInstruction::last);
 356    pred->set_flag(alu_update_exec);
 357    pred->set_flag(alu_update_pred);
 358    pred->set_cf_type(cf_alu_push_before);
 359
 360    IfInstruction *ir = new IfInstruction(pred);
 361    emit_instruction(ir);
 362    assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
 363    m_if_block_start_map[if_id] = ir;
 364    return true;
 365 }
 366
 367 bool ShaderFromNirProcessor::emit_else_start(int if_id)
 368 {
 369    auto iif = m_if_block_start_map.find(if_id);
 370    if (iif == m_if_block_start_map.end()) {
 371       std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
 372       return false;
 373    }
 374
 375    if (iif->second->type() != Instruction::cond_if) {
 376       std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
 377       return false;
 378    }
 379    IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
 380    ElseInstruction *ir = new ElseInstruction(if_instr);
 381    m_if_block_start_map[if_id] = ir;
 382    m_pending_else = ir;
 383
 384    return true;
 385 }
 386
 387 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
 388 {
 389    auto ifelse = m_if_block_start_map.find(if_id);
 390    if (ifelse == m_if_block_start_map.end()) {
 391       std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
 392       return false;
 393    }
 394
 395    if (ifelse->second->type() != Instruction::cond_if &&
 396        ifelse->second->type() != Instruction::cond_else) {
 397       std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
 398       return false;
 399    }
 400    /* Clear pending else, if the else branch was empty, non will be emitted */
 401
 402    m_pending_else = nullptr;
 403
 404    IfElseEndInstruction *ir = new IfElseEndInstruction();
 405    emit_instruction(ir);
 406
 407    return true;
 408 }
 409
 410 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
 411 {
 412    r600::sfn_log << SfnLog::instr << "emit '"
 413                  << *reinterpret_cast<nir_instr*>(instr)
 414                  << "' (" << __func__ << ")\n";
 415
 416    if (emit_intrinsic_instruction_override(instr))
 417       return true;
 418
 419    switch (instr->intrinsic) {
 420    case nir_intrinsic_load_deref: {
 421       auto var = get_deref_location(instr->src[0]);
 422       if (!var)
 423          return false;
 424       auto mode_helper = m_var_mode.find(var);
 425       if (mode_helper == m_var_mode.end()) {
 426          cerr << "r600-nir: variable '" << var->name << "' not found\n";
 427          return false;
 428       }
 429       switch (mode_helper->second) {
 430       case nir_var_shader_in:
 431          return emit_load_input_deref(var, instr);
 432       case nir_var_function_temp:
 433          return emit_load_function_temp(var, instr);
 434       default:
 435          cerr << "r600-nir: Unsupported mode" << mode_helper->second
 436               << "for src variable\n";
 437          return false;
 438       }
 439    }
 440    case nir_intrinsic_store_scratch:
 441       return emit_store_scratch(instr);
 442    case nir_intrinsic_load_scratch:
 443       return emit_load_scratch(instr);
 444    case nir_intrinsic_store_deref:
 445       return emit_store_deref(instr);
 446    case nir_intrinsic_load_uniform:
 447       return reserve_uniform(instr);
 448    case nir_intrinsic_discard:
 449    case nir_intrinsic_discard_if:
 450       return emit_discard_if(instr);
 451    case nir_intrinsic_load_ubo_r600:
 452       return emit_load_ubo(instr);
 453    case nir_intrinsic_atomic_counter_add:
 454    case nir_intrinsic_atomic_counter_and:
 455    case nir_intrinsic_atomic_counter_exchange:
 456    case nir_intrinsic_atomic_counter_max:
 457    case nir_intrinsic_atomic_counter_min:
 458    case nir_intrinsic_atomic_counter_or:
 459    case nir_intrinsic_atomic_counter_xor:
 460    case nir_intrinsic_atomic_counter_comp_swap:
 461    case nir_intrinsic_atomic_counter_read:
 462    case nir_intrinsic_atomic_counter_post_dec:
 463    case nir_intrinsic_atomic_counter_inc:
 464    case nir_intrinsic_atomic_counter_pre_dec:
 465    case nir_intrinsic_store_ssbo:
 466       m_sel.info.writes_memory = true;
 467       /* fallthrough */
 468    case nir_intrinsic_load_ssbo:
 469       return m_ssbo_instr.emit(&instr->instr);
 470       break;
 471    case nir_intrinsic_copy_deref:
 472    case nir_intrinsic_load_constant:
 473    case nir_intrinsic_load_input:
 474    case nir_intrinsic_store_output:
 475    default:
 476       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
 477       return false;
 478    }
 479    return false;
 480 }
 481
 482 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
 483 {
 484    return false;
 485 }
 486
 487 bool
 488 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
 489 {
 490    return false;
 491 }
 492
 493 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
 494 {
 495    if (!dest.is_ssa) {
 496       auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
 497       if (as_last)
 498          ir->set_flag(alu_last_instr);
 499       emit_instruction(ir);
 500    } else {
 501       inject_register(dest.ssa.index, chan, value, true);
 502    }
 503    return true;
 504 }
 505
 506 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
 507 {
 508    PValue address = from_nir(instr->src[1], 0, 0);
 509
 510    std::unique_ptr<GPRVector> vec(vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
 511                                   swizzle_from_mask(instr->num_components)));
 512    GPRVector value(*vec);
 513
 514    int writemask = nir_intrinsic_write_mask(instr);
 515    int align = nir_intrinsic_align_mul(instr);
 516    int align_offset = nir_intrinsic_align_offset(instr);
 517
 518    WriteScratchInstruction *ir = nullptr;
 519    if (address->type() == Value::literal) {
 520       const auto& lv = dynamic_cast<const LiteralValue&>(*address);
 521       ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
 522    } else {
 523       address = from_nir_with_fetch_constant(instr->src[1], 0);
 524       ir = new WriteScratchInstruction(address, value, align, align_offset,
 525                                        writemask, m_scratch_size);
 526    }
 527    emit_instruction(ir);
 528    sh_info().needs_scratch_space = 1;
 529    return true;
 530 }
 531
 532 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
 533 {
 534    PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
 535    std::array<PValue, 4> dst_val;
 536    for (int i = 0; i < 4; ++i)
 537       dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
 538
 539    GPRVector dst(dst_val);
 540    auto ir = new LoadFromScratch(dst, address, m_scratch_size);
 541    ir->prelude_append(new WaitAck(0));
 542    emit_instruction(ir);
 543    sh_info().needs_scratch_space = 1;
 544    return true;
 545 }
 546
 547 GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
 548                                                                     UNUSED unsigned mask,
 549                                                                     const GPRVector::Swizzle& swizzle)
 550 {
 551    GPRVector *result = nullptr;
 552    int sel = lookup_register_index(src);
 553    if (sel >= 0 && from_nir(src, 0)->type() == Value::gpr &&
 554        from_nir(src, 0)->chan() == 0) {
 555       /* If the x-channel is really an x-channel register then we are pretty
 556        * save that the value come like we need them */
 557       result = new GPRVector(from_nir(src, 0)->sel(), swizzle);
 558    } else {
 559       AluInstruction *ir = nullptr;
 560       int sel = allocate_temp_register();
 561       GPRVector::Values v;
 562       for (int i = 0; i < 4; ++i) {
 563          v[i] = PValue(new GPRValue(sel, swizzle[i]));
 564          if (swizzle[i] < 4 && (mask & (1 << i))) {
 565             ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
 566                                     EmitInstruction::write);
 567             emit_instruction(ir);
 568          }
 569       }
 570       if (ir)
 571          ir->set_flag(alu_last_instr);
 572
 573       result = new GPRVector(v);
 574    }
 575    return result;
 576 }
 577
 578 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
 579 {
 580    nir_src& src0 = instr->src[0];
 581    nir_src& src1 = instr->src[1];
 582
 583    int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
 584    const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
 585
 586    int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
 587    const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
 588    if (literal0) {
 589       if (literal1) {
 590          uint bufid = literal0->value[0].u32;
 591          uint buf_ofs = literal1->value[0].u32 >> 4;
 592          int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
 593          AluInstruction *ir = nullptr;
 594          for (int i = 0; i < instr->num_components; ++i) {
 595             int cmp = buf_cmp + i;
 596             assert(cmp < 4);
 597             auto u = PValue(new UniformValue(512 +  buf_ofs, cmp, bufid + 1));
 598             if (instr->dest.is_ssa)
 599                add_uniform((instr->dest.ssa.index << 2) + i, u);
 600             else {
 601                ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
 602                emit_instruction(ir);
 603             }
 604          }
 605          if (ir)
 606             ir->set_flag(alu_last_instr);
 607          return true;
 608
 609       } else {
 610          /* literal0 is lost ...*/
 611          return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
 612       }
 613    } else {
 614       /* TODO: This can also be solved by using the CF indes on the ALU block, and
 615        * this would probably make sense when there are more then one loads with
 616        * the same buffer ID. */
 617       PValue bufid = from_nir(instr->src[0], 0, 0);
 618       PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
 619       GPRVector trgt;
 620       for (int i = 0; i < 4; ++i)
 621          trgt.set_reg_i(i, from_nir(instr->dest, i));
 622
 623       auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
 624                                      1, bufid, bim_zero);
 625
 626       emit_instruction(ir);
 627       for (int i = 0; i < instr->num_components ; ++i) {
 628          add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 629       }
 630       m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 631       return true;
 632    }
 633
 634 }
 635
 636 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
 637 {
 638    r600::sfn_log << SfnLog::instr << "emit '"
 639                  << *reinterpret_cast<nir_instr*>(instr)
 640                  << "' (" << __func__ << ")\n";
 641
 642    if (instr->intrinsic == nir_intrinsic_discard_if) {
 643       emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
 644                           {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
 645
 646    } else {
 647       emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
 648                        {Value::zero, Value::zero}, {alu_last_instr}));
 649    }
 650    m_sh_info.uses_kill = 1;
 651    return true;
 652 }
 653
 654 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
 655                                                    nir_intrinsic_instr* instr)
 656 {
 657    return do_emit_load_deref(var, instr);
 658 }
 659
 660 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
 661 {
 662    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 663                  << *reinterpret_cast<nir_instr*>(instr)
 664                  << "'\n";
 665
 666
 667    /* If the target register is a SSA register and the loading is not
 668     * indirect then we can do lazy loading, i.e. the uniform value can
 669     * be used directly. Otherwise we have to load the data for real
 670     * rigt away.
 671     */
 672
 673    /* Try to find the literal that defines the array index */
 674    const nir_load_const_instr* literal = nullptr;
 675    if (instr->src[0].is_ssa)
 676       literal = get_literal_constant(instr->src[0].ssa->index);
 677
 678    int base = nir_intrinsic_base(instr);
 679    if (literal) {
 680       AluInstruction *ir = nullptr;
 681
 682       for (int i = 0; i < instr->num_components ; ++i) {
 683          PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
 684          sfn_log << SfnLog::io << "uniform "
 685                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
 686
 687          if (instr->dest.is_ssa)
 688             add_uniform((instr->dest.ssa.index << 2) + i, u);
 689          else {
 690             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
 691                                                    u, {alu_write});
 692              emit_instruction(ir);
 693          }
 694       }
 695       if (ir)
 696          ir->set_flag(alu_last_instr);
 697    } else {
 698       PValue addr = from_nir(instr->src[0], 0, 0);
 699       return load_uniform_indirect(instr, addr, 16 * base, 0);
 700    }
 701    return true;
 702 }
 703
 704 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
 705 {
 706    if (!addr) {
 707       std::cerr << "r600-nir: don't know how uniform is addressed\n";
 708       return false;
 709    }
 710
 711    GPRVector trgt;
 712    for (int i = 0; i < 4; ++i)
 713       trgt.set_reg_i(i, from_nir(instr->dest, i));
 714
 715    if (addr->type() != Value::gpr) {
 716       emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
 717       addr = trgt.reg_i(0);
 718    }
 719
 720    /* FIXME: buffer index and index mode are not set correctly */
 721    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
 722                                   bufferid, PValue(), bim_none);
 723    emit_instruction(ir);
 724    for (int i = 0; i < instr->num_components ; ++i) {
 725       add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 726    }
 727    m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 728    return true;
 729 }
 730
 731 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
 732 {
 733    AluInstruction *ir = nullptr;
 734    for (int i = 0; i < literal->def.num_components ; ++i) {
 735       if (writemask & (1 << i)){
 736          PValue lsrc;
 737          switch (literal->def.bit_size) {
 738
 739          case 1:
 740             sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
 741             lsrc = literal->value[i].b ?
 742                      PValue(new LiteralValue( 0xffffffff, i)) :
 743                      Value::zero;
 744             break;
 745          case 32:
 746             sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
 747             if (literal->value[i].u32 == 0)
 748                lsrc = Value::zero;
 749             else if (literal->value[i].u32 == 1)
 750                lsrc = Value::one_i;
 751             else if (literal->value[i].f32 == 1.0f)
 752                lsrc = Value::one_f;
 753             else if (literal->value[i].f32 == 0.5f)
 754                lsrc = Value::zero_dot_5;
 755             else
 756                lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 757             break;
 758          default:
 759             sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
 760                     << " falling back to 32 bit\n";
 761             lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 762          }
 763          ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
 764
 765          emit_instruction(ir);
 766       }
 767    }
 768    return ir;
 769 }
 770
 771 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
 772 {
 773    PValue value = from_nir(src, component);
 774    if (value->type() != Value::gpr &&
 775        value->type() != Value::gpr_vector &&
 776        value->type() != Value::gpr_array_value) {
 777       unsigned temp = allocate_temp_register();
 778       PValue retval(new GPRValue(temp, component));
 779       emit_instruction(new AluInstruction(op1_mov, retval, value,
 780                                           EmitInstruction::last_write));
 781       value = retval;
 782    }
 783    return value;
 784 }
 785
 786 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
 787 {
 788    auto out_var = get_deref_location(instr->src[0]);
 789    if (!out_var)
 790       return false;
 791
 792    return do_emit_store_deref(out_var, instr);
 793 }
 794
 795 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
 796 {
 797    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 798                  << *reinterpret_cast<nir_instr*>(instr)
 799                  << "'\n";
 800
 801    /* Give the specific shader type a chance to process this, i.e. Geometry and
 802     * tesselation shaders need specialized deref_array, for the other shaders
 803     * it is lowered.
 804     */
 805    if (emit_deref_instruction_override(instr))
 806       return true;
 807
 808    switch (instr->deref_type) {
 809    case nir_deref_type_var:
 810       set_var_address(instr);
 811       return true;
 812    case nir_deref_type_array:
 813    case nir_deref_type_array_wildcard:
 814    case nir_deref_type_struct:
 815    case nir_deref_type_cast:
 816    default:
 817       fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
 818    }
 819    return false;
 820 }
 821
 822 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
 823 {
 824    AluInstruction *ir = nullptr;
 825    PValue sv[4];
 826
 827    assert(src.src.is_ssa);
 828
 829    for (int i = 0; i < src.src.ssa->num_components ; ++i)  {
 830       unsigned uindex = (src.src.ssa->index << 2) + i;
 831       sv[i] = uniform(uindex);
 832       assert(sv[i]);
 833    }
 834
 835    for (int i = 0; i < src.src.ssa->num_components ; ++i) {
 836       ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
 837                               EmitInstruction::write);
 838       emit_instruction(ir);
 839    }
 840    if (ir)
 841       ir->set_flag(alu_last_instr);
 842 }
 843
 844
 845
 846 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
 847                                               std::vector<PValue> srcs,
 848                                               const std::set<AluModifiers>& m_flags)
 849 {
 850    AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
 851    emit_instruction(ir);
 852    return true;
 853 }
 854
 855 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
 856 {
 857    m_output_register_map[loc] = gpr;
 858 }
 859
 860 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
 861 {
 862    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 863    m_export_output.push_back(PInstruction(ir));
 864 }
 865
 866 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
 867 {
 868    const GPRVector *retval = nullptr;
 869    auto val = m_output_register_map.find(location);
 870    if (val != m_output_register_map.end())
 871       retval =  val->second;
 872    return retval;
 873 }
 874
 875 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
 876 {
 877    r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
 878    m_inputs[pos] = var;
 879 }
 880
 881 void ShaderFromNirProcessor::set_output(unsigned pos, PValue var)
 882 {
 883    r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << *var <<  "\n";
 884    m_outputs[pos] = var;
 885 }
 886
 887 void ShaderFromNirProcessor::finalize()
 888 {
 889    do_finalize();
 890
 891    for (auto& i : m_inputs)
 892       m_sh_info.input[i.first].gpr = i.second->sel();
 893
 894    for (auto& i : m_outputs)
 895       m_sh_info.output[i.first].gpr = i.second->sel();
 896
 897    m_output.insert(m_output.end(), m_export_output.begin(), m_export_output.end());
 898    m_export_output.clear();
 899 }
 900
 901 }