src/gallium/drivers/r600/sfn/sfn_shader_base.cpp

   1 /* -*- mesa-c++  -*-
   2  *
   3  * Copyright (c) 2018 Collabora LTD
   4  *
   5  * Author: Gert Wollny <gert.wollny@collabora.com>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * on the rights to use, copy, modify, merge, publish, distribute, sub
  11  * license, and/or sell copies of the Software, and to permit persons to whom
  12  * the Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27 #include "../r600_pipe.h"
  28 #include "../r600_shader.h"
  29 #include "sfn_shader_vertex.h"
  30
  31 #include "sfn_shader_compute.h"
  32 #include "sfn_shader_fragment.h"
  33 #include "sfn_shader_geometry.h"
  34 #include "sfn_liverange.h"
  35 #include "sfn_ir_to_assembly.h"
  36 #include "sfn_nir.h"
  37 #include "sfn_instruction_misc.h"
  38 #include "sfn_instruction_fetch.h"
  39 #include "sfn_instruction_lds.h"
  40
  41 #include <iostream>
  42
  43 #define ENABLE_DEBUG 1
  44
  45 #ifdef ENABLE_DEBUG
  46 #define DEBUG_SFN(X)  \
  47    do {\
  48       X; \
  49    } while (0)
  50 #else
  51 #define DEBUG_SFN(X)
  52 #endif
  53
  54 namespace r600 {
  55
  56 using namespace std;
  57
  58
  59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
  60                                                r600_pipe_shader_selector& sel,
  61                                                r600_shader &sh_info, int scratch_size,
  62                                                enum chip_class chip_class):
  63    m_processor_type(ptype),
  64    m_nesting_depth(0),
  65    m_block_number(0),
  66    m_export_output(0, -1),
  67    m_sh_info(sh_info),
  68    m_chip_class(chip_class),
  69    m_tex_instr(*this),
  70    m_alu_instr(*this),
  71    m_ssbo_instr(*this),
  72    m_pending_else(nullptr),
  73    m_scratch_size(scratch_size),
  74    m_next_hwatomic_loc(0),
  75    m_sel(sel)
  76 {
  77    m_sh_info.processor_type = ptype;
  78 }
  79
  80
  81 ShaderFromNirProcessor::~ShaderFromNirProcessor()
  82 {
  83 }
  84
  85 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
  86 {
  87    switch (instr->type) {
  88    case nir_instr_type_tex: {
  89       nir_tex_instr *t = nir_instr_as_tex(instr);
  90       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
  91          sh_info().uses_tex_buffers = true;
  92    }
  93    default:
  94       ;
  95    }
  96
  97    return scan_sysvalue_access(instr);
  98 }
  99
 100 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
 101 {
 102   return m_chip_class;
 103 }
 104
 105 static void remap_shader_info(r600_shader& sh_info,
 106                               std::vector<rename_reg_pair>& map,
 107                               UNUSED ValueMap& values)
 108 {
 109    for (unsigned i = 0; i < sh_info.ninput; ++i) {
 110       sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
 111               << " of map.size()\n";
 112
 113       assert(sh_info.input[i].gpr < map.size());
 114       auto new_index = map[sh_info.input[i].gpr];
 115       if (new_index.valid)
 116          sh_info.input[i].gpr = new_index.new_reg;
 117       map[sh_info.input[i].gpr].used = true;
 118    }
 119
 120    for (unsigned i = 0; i < sh_info.noutput; ++i) {
 121       assert(sh_info.output[i].gpr < map.size());
 122       auto new_index = map[sh_info.output[i].gpr];
 123       if (new_index.valid)
 124          sh_info.output[i].gpr = new_index.new_reg;
 125       map[sh_info.output[i].gpr].used = true;
 126    }
 127 }
 128
 129 void ShaderFromNirProcessor::remap_registers()
 130 {
 131    // register renumbering
 132    auto rc = register_count();
 133    if (!rc)
 134       return;
 135
 136    std::vector<register_live_range> register_live_ranges(rc);
 137
 138    auto temp_register_map = get_temp_registers();
 139
 140    Shader sh{m_output, temp_register_map};
 141    LiverangeEvaluator().run(sh, register_live_ranges);
 142    auto register_map = get_temp_registers_remapping(register_live_ranges);
 143
 144    sfn_log << SfnLog::merge << "=========Mapping===========\n";
 145    for (size_t  i = 0; i < register_map.size(); ++i)
 146       if (register_map[i].valid)
 147          sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
 148
 149    ValueRemapper vmap0(register_map, temp_register_map);
 150    for (auto& block: m_output)
 151       block.remap_registers(vmap0);
 152
 153    remap_shader_info(m_sh_info, register_map, temp_register_map);
 154
 155    /* Mark inputs as used registers, these registers should no be remapped */
 156    for (auto& v: sh.m_temp) {
 157       if (v.second->type() == Value::gpr) {
 158          const auto& g = static_cast<const GPRValue&>(*v.second);
 159          if (g.is_input())
 160             register_map[g.sel()].used = true;
 161       }
 162    }
 163
 164    int new_index = 0;
 165    for (auto& i : register_map) {
 166       i.valid = i.used;
 167       if (i.used)
 168          i.new_reg = new_index++;
 169    }
 170
 171    ValueRemapper vmap1(register_map, temp_register_map);
 172    for (auto& ir: m_output)
 173       ir.remap_registers(vmap1);
 174
 175    remap_shader_info(m_sh_info, register_map, temp_register_map);
 176 }
 177
 178 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
 179 {
 180    // m_uniform_type_map
 181    m_uniform_type_map[uniform->data.location] = uniform->type;
 182
 183    if (uniform->type->contains_atomic()) {
 184       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
 185       sh_info().nhwatomic += natomics;
 186
 187       if (uniform->type->is_array())
 188          sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
 189
 190       sh_info().uses_atomics = 1;
 191
 192       struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
 193       ++sh_info().nhwatomic_ranges;
 194       atom.buffer_id = uniform->data.binding;
 195       atom.hw_idx = m_next_hwatomic_loc;
 196       atom.start = m_next_hwatomic_loc;
 197       atom.end = atom.start + natomics - 1;
 198       m_next_hwatomic_loc = atom.end + 1;
 199       //atom.array_id = uniform->type->is_array() ? 1 : 0;
 200
 201       m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
 202
 203       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
 204               << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
 205    }
 206
 207    if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
 208       sh_info().uses_images = 1;
 209    }
 210
 211    return true;
 212 }
 213
 214 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
 215 {
 216    return do_process_inputs(input);
 217 }
 218
 219 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
 220 {
 221    return do_process_outputs(output);
 222 }
 223
 224 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
 225 {
 226    nir_variable *var = nir_deref_instr_get_variable(instr);
 227
 228    assert(instr->mode == nir_var_function_temp);
 229    assert(glsl_type_is_array(var->type));
 230
 231    // add an alias for the index to the register(s);
 232
 233
 234 }
 235
 236 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
 237 {
 238    auto& dest = instr->dest;
 239    unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
 240    m_var_mode[instr->var] = instr->mode;
 241    m_var_derefs[index] = instr->var;
 242
 243    sfn_log << SfnLog::io << "Add var deref:" << index
 244            << " with DDL:" << instr->var->data.driver_location << "\n";
 245 }
 246
 247 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
 248 {
 249    switch (io.name) {
 250    case TGSI_SEMANTIC_POSITION:
 251    case TGSI_SEMANTIC_PSIZE:
 252    case TGSI_SEMANTIC_EDGEFLAG:
 253    case TGSI_SEMANTIC_FACE:
 254    case TGSI_SEMANTIC_SAMPLEMASK:
 255    case TGSI_SEMANTIC_CLIPVERTEX:
 256       io.spi_sid = 0;
 257       break;
 258    case TGSI_SEMANTIC_GENERIC:
 259    case TGSI_SEMANTIC_TEXCOORD:
 260    case TGSI_SEMANTIC_PCOORD:
 261       io.spi_sid = io.sid + 1;
 262       break;
 263    default:
 264       /* For non-generic params - pack name and sid into 8 bits */
 265       io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
 266    }
 267 }
 268
 269 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
 270 {
 271    unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
 272
 273    sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
 274
 275    auto v = m_var_derefs.find(index);
 276    if (v != m_var_derefs.end())
 277       return v->second;
 278
 279      fprintf(stderr, "R600: could not find deref with index %d\n", index);
 280
 281      return nullptr;
 282
 283    /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
 284    return  nir_deref_instr_get_variable(deref); */
 285 }
 286
 287 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
 288 {
 289    return m_tex_instr.emit(instr);
 290 }
 291
 292 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
 293 {
 294    if (m_pending_else) {
 295       append_block(-1);
 296       m_output.back().emit(PInstruction(m_pending_else));
 297       append_block(1);
 298       m_pending_else = nullptr;
 299    }
 300
 301    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 302    if (m_output.empty())
 303       append_block(0);
 304
 305    m_output.back().emit(Instruction::Pointer(ir));
 306 }
 307
 308 void ShaderFromNirProcessor::emit_shader_start()
 309 {
 310    /* placeholder, may become an abstract method */
 311 }
 312
 313 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
 314 {
 315    switch (instr->type) {
 316    case nir_jump_break: {
 317       auto b = new LoopBreakInstruction();
 318       emit_instruction(b);
 319       return true;
 320    }
 321    case nir_jump_continue: {
 322       auto  b = new LoopContInstruction();
 323       emit_instruction(b);
 324       return true;
 325    }
 326    default: {
 327       nir_instr *i = reinterpret_cast<nir_instr*>(instr);
 328       sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
 329       return false;
 330    }
 331    }
 332    return true;
 333 }
 334
 335 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
 336 {
 337    return m_alu_instr.emit(instr);
 338 }
 339
 340 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
 341 {
 342    return false;
 343 }
 344
 345 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
 346 {
 347    LoopBeginInstruction *loop = new LoopBeginInstruction();
 348    emit_instruction(loop);
 349    m_loop_begin_block_map[loop_id] = loop;
 350    append_block(1);
 351    return true;
 352 }
 353 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
 354 {
 355    auto start = m_loop_begin_block_map.find(loop_id);
 356    if (start == m_loop_begin_block_map.end()) {
 357       sfn_log << SfnLog::err  << "End loop: Loop start for "
 358               << loop_id << "  not found\n";
 359       return false;
 360    }
 361    m_nesting_depth--;
 362    m_block_number++;
 363    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
 364    LoopEndInstruction *loop = new LoopEndInstruction(start->second);
 365    emit_instruction(loop);
 366
 367    m_loop_begin_block_map.erase(start);
 368    return true;
 369 }
 370
 371 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
 372 {
 373
 374    auto value = from_nir(if_stmt->condition, 0, 0);
 375    AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
 376                                              value, Value::zero, EmitInstruction::last);
 377    pred->set_flag(alu_update_exec);
 378    pred->set_flag(alu_update_pred);
 379    pred->set_cf_type(cf_alu_push_before);
 380
 381    append_block(1);
 382
 383    IfInstruction *ir = new IfInstruction(pred);
 384    emit_instruction(ir);
 385    assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
 386    m_if_block_start_map[if_id] = ir;
 387    return true;
 388 }
 389
 390 bool ShaderFromNirProcessor::emit_else_start(int if_id)
 391 {
 392    auto iif = m_if_block_start_map.find(if_id);
 393    if (iif == m_if_block_start_map.end()) {
 394       std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
 395       return false;
 396    }
 397
 398    if (iif->second->type() != Instruction::cond_if) {
 399       std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
 400       return false;
 401    }
 402    IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
 403    ElseInstruction *ir = new ElseInstruction(if_instr);
 404    m_if_block_start_map[if_id] = ir;
 405    m_pending_else = ir;
 406
 407    return true;
 408 }
 409
 410 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
 411 {
 412    auto ifelse = m_if_block_start_map.find(if_id);
 413    if (ifelse == m_if_block_start_map.end()) {
 414       std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
 415       return false;
 416    }
 417
 418    if (ifelse->second->type() != Instruction::cond_if &&
 419        ifelse->second->type() != Instruction::cond_else) {
 420       std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
 421       return false;
 422    }
 423    /* Clear pending else, if the else branch was empty, non will be emitted */
 424
 425    m_pending_else = nullptr;
 426
 427    append_block(-1);
 428    IfElseEndInstruction *ir = new IfElseEndInstruction();
 429    emit_instruction(ir);
 430
 431    return true;
 432 }
 433
 434 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
 435 {
 436    PValue src = get_temp_register();
 437    emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
 438
 439    GPRVector dest = vec_from_nir(instr->dest, instr->num_components);
 440    emit_instruction(new FetchTCSIOParam(dest, src, offset));
 441
 442    return true;
 443
 444 }
 445
 446 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
 447 {
 448    auto address = varvec_from_nir(instr->src[0], instr->num_components);
 449    auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
 450
 451    emit_instruction(new LDSReadInstruction(address, dest_value));
 452    return true;
 453 }
 454
 455 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
 456 {
 457    unsigned write_mask = nir_intrinsic_write_mask(instr);
 458
 459    auto address = from_nir(instr->src[1], 0);
 460    int swizzle_base = (write_mask & 0x3) ? 0 : 2;
 461    write_mask |= write_mask >> 2;
 462
 463    auto value =  from_nir(instr->src[0], swizzle_base);
 464    if (!(write_mask & 2)) {
 465       emit_instruction(new LDSWriteInstruction(address, 0, value));
 466    } else {
 467       auto value1 = from_nir(instr->src[0], swizzle_base + 1);
 468       emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
 469    }
 470
 471    return true;
 472 }
 473
 474 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
 475 {
 476    r600::sfn_log << SfnLog::instr << "emit '"
 477                  << *reinterpret_cast<nir_instr*>(instr)
 478                  << "' (" << __func__ << ")\n";
 479
 480    if (emit_intrinsic_instruction_override(instr))
 481       return true;
 482
 483    switch (instr->intrinsic) {
 484    case nir_intrinsic_load_deref: {
 485       auto var = get_deref_location(instr->src[0]);
 486       if (!var)
 487          return false;
 488       auto mode_helper = m_var_mode.find(var);
 489       if (mode_helper == m_var_mode.end()) {
 490          cerr << "r600-nir: variable '" << var->name << "' not found\n";
 491          return false;
 492       }
 493       switch (mode_helper->second) {
 494       case nir_var_shader_in:
 495          return emit_load_input_deref(var, instr);
 496       case nir_var_function_temp:
 497          return emit_load_function_temp(var, instr);
 498       default:
 499          cerr << "r600-nir: Unsupported mode" << mode_helper->second
 500               << "for src variable\n";
 501          return false;
 502       }
 503    }
 504    case nir_intrinsic_store_scratch:
 505       return emit_store_scratch(instr);
 506    case nir_intrinsic_load_scratch:
 507       return emit_load_scratch(instr);
 508    case nir_intrinsic_store_deref:
 509       return emit_store_deref(instr);
 510    case nir_intrinsic_load_uniform:
 511       return reserve_uniform(instr);
 512    case nir_intrinsic_discard:
 513    case nir_intrinsic_discard_if:
 514       return emit_discard_if(instr);
 515    case nir_intrinsic_load_ubo_r600:
 516       return emit_load_ubo(instr);
 517    case nir_intrinsic_atomic_counter_add:
 518    case nir_intrinsic_atomic_counter_and:
 519    case nir_intrinsic_atomic_counter_exchange:
 520    case nir_intrinsic_atomic_counter_max:
 521    case nir_intrinsic_atomic_counter_min:
 522    case nir_intrinsic_atomic_counter_or:
 523    case nir_intrinsic_atomic_counter_xor:
 524    case nir_intrinsic_atomic_counter_comp_swap:
 525    case nir_intrinsic_atomic_counter_read:
 526    case nir_intrinsic_atomic_counter_post_dec:
 527    case nir_intrinsic_atomic_counter_inc:
 528    case nir_intrinsic_atomic_counter_pre_dec:
 529    case nir_intrinsic_store_ssbo:
 530       m_sel.info.writes_memory = true;
 531       /* fallthrough */
 532    case nir_intrinsic_load_ssbo:
 533       return m_ssbo_instr.emit(&instr->instr);
 534       break;
 535    case nir_intrinsic_copy_deref:
 536    case nir_intrinsic_load_constant:
 537    case nir_intrinsic_load_input:
 538    case nir_intrinsic_store_output:
 539    case nir_intrinsic_load_tcs_in_param_base_r600:
 540       return emit_load_tcs_param_base(instr, 0);
 541    case nir_intrinsic_load_tcs_out_param_base_r600:
 542       return emit_load_tcs_param_base(instr, 16);
 543    case nir_intrinsic_load_local_shared_r600:
 544       return emit_load_local_shared(instr);
 545    case nir_intrinsic_store_local_shared_r600:
 546       return emit_store_local_shared(instr);
 547    case nir_intrinsic_control_barrier:
 548    case nir_intrinsic_memory_barrier_tcs_patch:
 549       return emit_barrier(instr);
 550
 551    default:
 552       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
 553       return false;
 554    }
 555    return false;
 556 }
 557
 558 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
 559 {
 560    return false;
 561 }
 562
 563 bool
 564 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
 565 {
 566    return false;
 567 }
 568
 569 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
 570 {
 571    AluInstruction *ir = new AluInstruction(op0_group_barrier);
 572    ir->set_flag(alu_last_instr);
 573    emit_instruction(ir);
 574    return true;
 575 }
 576
 577
 578 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
 579 {
 580    if (!dest.is_ssa) {
 581       auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
 582       if (as_last)
 583          ir->set_flag(alu_last_instr);
 584       emit_instruction(ir);
 585    } else {
 586       inject_register(dest.ssa.index, chan, value, true);
 587    }
 588    return true;
 589 }
 590
 591 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
 592 {
 593    PValue address = from_nir(instr->src[1], 0, 0);
 594
 595    auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
 596          swizzle_from_comps(instr->num_components));
 597
 598    int writemask = nir_intrinsic_write_mask(instr);
 599    int align = nir_intrinsic_align_mul(instr);
 600    int align_offset = nir_intrinsic_align_offset(instr);
 601
 602    WriteScratchInstruction *ir = nullptr;
 603    if (address->type() == Value::literal) {
 604       const auto& lv = static_cast<const LiteralValue&>(*address);
 605       ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
 606    } else {
 607       address = from_nir_with_fetch_constant(instr->src[1], 0);
 608       ir = new WriteScratchInstruction(address, value, align, align_offset,
 609                                        writemask, m_scratch_size);
 610    }
 611    emit_instruction(ir);
 612    sh_info().needs_scratch_space = 1;
 613    return true;
 614 }
 615
 616 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
 617 {
 618    PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
 619    std::array<PValue, 4> dst_val;
 620    for (int i = 0; i < 4; ++i)
 621       dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
 622
 623    GPRVector dst(dst_val);
 624    auto ir = new LoadFromScratch(dst, address, m_scratch_size);
 625    ir->prelude_append(new WaitAck(0));
 626    emit_instruction(ir);
 627    sh_info().needs_scratch_space = 1;
 628    return true;
 629 }
 630
 631 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
 632                                                                    unsigned mask,
 633                                                                    const GPRVector::Swizzle& swizzle,
 634                                                                    bool match)
 635 {
 636    bool use_same = true;
 637    GPRVector::Values v;
 638
 639    for (int i = 0; i < 4 && use_same; ++i)  {
 640       if ((1 << i) & mask) {
 641          if (swizzle[i] < 4) {
 642             v[i] = from_nir(src, swizzle[i]);
 643             assert(v[i]);
 644             if (v[i]->type() != Value::gpr)
 645                use_same = false;
 646             if (match && (v[i]->chan() != swizzle[i]))
 647                 use_same = false;
 648          }
 649       }
 650    }
 651
 652    if (use_same) {
 653       int i = 0;
 654       while (!v[i] && i < 4) ++i;
 655       assert(i < 4);
 656
 657       unsigned sel = v[i]->sel();
 658       for (i = 0; i < 4 && use_same; ++i) {
 659          if (!v[i])
 660             v[i] = PValue(new GPRValue(sel, swizzle[i]));
 661          else
 662             use_same &= v[i]->sel() == sel;
 663       }
 664    }
 665
 666    if (!use_same) {
 667       AluInstruction *ir = nullptr;
 668       int sel = allocate_temp_register();
 669       for (int i = 0; i < 4; ++i) {
 670          v[i] = PValue(new GPRValue(sel, swizzle[i]));
 671          if (swizzle[i] < 4 && (mask & (1 << i))) {
 672             ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
 673                                     EmitInstruction::write);
 674             emit_instruction(ir);
 675          }
 676       }
 677       if (ir)
 678          ir->set_flag(alu_last_instr);
 679    }
 680    return GPRVector(v);;
 681 }
 682
 683 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
 684 {
 685    nir_src& src0 = instr->src[0];
 686    nir_src& src1 = instr->src[1];
 687
 688    int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
 689    const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
 690
 691    int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
 692    const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
 693    if (literal0) {
 694       if (literal1) {
 695          uint bufid = literal0->value[0].u32;
 696          uint buf_ofs = literal1->value[0].u32 >> 4;
 697          int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
 698          AluInstruction *ir = nullptr;
 699          for (int i = 0; i < instr->num_components; ++i) {
 700             int cmp = buf_cmp + i;
 701             assert(cmp < 4);
 702             auto u = PValue(new UniformValue(512 +  buf_ofs, cmp, bufid + 1));
 703             if (instr->dest.is_ssa)
 704                add_uniform((instr->dest.ssa.index << 2) + i, u);
 705             else {
 706                ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
 707                emit_instruction(ir);
 708             }
 709          }
 710          if (ir)
 711             ir->set_flag(alu_last_instr);
 712          return true;
 713
 714       } else {
 715          /* literal0 is lost ...*/
 716          return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
 717       }
 718    } else {
 719       /* TODO: This can also be solved by using the CF indes on the ALU block, and
 720        * this would probably make sense when there are more then one loads with
 721        * the same buffer ID. */
 722       PValue bufid = from_nir(instr->src[0], 0, 0);
 723       PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
 724       GPRVector trgt;
 725       for (int i = 0; i < 4; ++i)
 726          trgt.set_reg_i(i, from_nir(instr->dest, i));
 727
 728       auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
 729                                      1, bufid, bim_zero);
 730
 731       emit_instruction(ir);
 732       for (int i = 0; i < instr->num_components ; ++i) {
 733          add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 734       }
 735       m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 736       return true;
 737    }
 738
 739 }
 740
 741 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
 742 {
 743    r600::sfn_log << SfnLog::instr << "emit '"
 744                  << *reinterpret_cast<nir_instr*>(instr)
 745                  << "' (" << __func__ << ")\n";
 746
 747    if (instr->intrinsic == nir_intrinsic_discard_if) {
 748       emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
 749                           {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
 750
 751    } else {
 752       emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
 753                        {Value::zero, Value::zero}, {alu_last_instr}));
 754    }
 755    m_sh_info.uses_kill = 1;
 756    return true;
 757 }
 758
 759 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
 760                                                    nir_intrinsic_instr* instr)
 761 {
 762    return do_emit_load_deref(var, instr);
 763 }
 764
 765 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
 766 {
 767    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 768                  << *reinterpret_cast<nir_instr*>(instr)
 769                  << "'\n";
 770
 771
 772    /* If the target register is a SSA register and the loading is not
 773     * indirect then we can do lazy loading, i.e. the uniform value can
 774     * be used directly. Otherwise we have to load the data for real
 775     * rigt away.
 776     */
 777
 778    /* Try to find the literal that defines the array index */
 779    const nir_load_const_instr* literal = nullptr;
 780    if (instr->src[0].is_ssa)
 781       literal = get_literal_constant(instr->src[0].ssa->index);
 782
 783    int base = nir_intrinsic_base(instr);
 784    if (literal) {
 785       AluInstruction *ir = nullptr;
 786
 787       for (int i = 0; i < instr->num_components ; ++i) {
 788          PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
 789          sfn_log << SfnLog::io << "uniform "
 790                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
 791
 792          if (instr->dest.is_ssa)
 793             add_uniform((instr->dest.ssa.index << 2) + i, u);
 794          else {
 795             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
 796                                                    u, {alu_write});
 797              emit_instruction(ir);
 798          }
 799       }
 800       if (ir)
 801          ir->set_flag(alu_last_instr);
 802    } else {
 803       PValue addr = from_nir(instr->src[0], 0, 0);
 804       return load_uniform_indirect(instr, addr, 16 * base, 0);
 805    }
 806    return true;
 807 }
 808
 809 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
 810 {
 811    if (!addr) {
 812       std::cerr << "r600-nir: don't know how uniform is addressed\n";
 813       return false;
 814    }
 815
 816    GPRVector trgt;
 817    for (int i = 0; i < 4; ++i)
 818       trgt.set_reg_i(i, from_nir(instr->dest, i));
 819
 820    if (addr->type() != Value::gpr) {
 821       emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
 822       addr = trgt.reg_i(0);
 823    }
 824
 825    /* FIXME: buffer index and index mode are not set correctly */
 826    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
 827                                   bufferid, PValue(), bim_none);
 828    emit_instruction(ir);
 829    for (int i = 0; i < instr->num_components ; ++i) {
 830       add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
 831    }
 832    m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
 833    return true;
 834 }
 835
 836 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
 837 {
 838    AluInstruction *ir = nullptr;
 839    for (int i = 0; i < literal->def.num_components ; ++i) {
 840       if (writemask & (1 << i)){
 841          PValue lsrc;
 842          switch (literal->def.bit_size) {
 843
 844          case 1:
 845             sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
 846             lsrc = literal->value[i].b ?
 847                      PValue(new LiteralValue( 0xffffffff, i)) :
 848                      Value::zero;
 849             break;
 850          case 32:
 851             sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
 852             if (literal->value[i].u32 == 0)
 853                lsrc = Value::zero;
 854             else if (literal->value[i].u32 == 1)
 855                lsrc = Value::one_i;
 856             else if (literal->value[i].f32 == 1.0f)
 857                lsrc = Value::one_f;
 858             else if (literal->value[i].f32 == 0.5f)
 859                lsrc = Value::zero_dot_5;
 860             else
 861                lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 862             break;
 863          default:
 864             sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
 865                     << " falling back to 32 bit\n";
 866             lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
 867          }
 868          ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
 869
 870          emit_instruction(ir);
 871       }
 872    }
 873    return ir;
 874 }
 875
 876 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
 877 {
 878    PValue value = from_nir(src, component);
 879    if (value->type() != Value::gpr &&
 880        value->type() != Value::gpr_vector &&
 881        value->type() != Value::gpr_array_value) {
 882       PValue retval = get_temp_register();
 883       emit_instruction(new AluInstruction(op1_mov, retval, value,
 884                                           EmitInstruction::last_write));
 885       value = retval;
 886    }
 887    return value;
 888 }
 889
 890 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
 891 {
 892    auto out_var = get_deref_location(instr->src[0]);
 893    if (!out_var)
 894       return false;
 895
 896    return do_emit_store_deref(out_var, instr);
 897 }
 898
 899 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
 900 {
 901    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
 902                  << *reinterpret_cast<nir_instr*>(instr)
 903                  << "'\n";
 904
 905    /* Give the specific shader type a chance to process this, i.e. Geometry and
 906     * tesselation shaders need specialized deref_array, for the other shaders
 907     * it is lowered.
 908     */
 909    if (emit_deref_instruction_override(instr))
 910       return true;
 911
 912    switch (instr->deref_type) {
 913    case nir_deref_type_var:
 914       set_var_address(instr);
 915       return true;
 916    case nir_deref_type_array:
 917    case nir_deref_type_array_wildcard:
 918    case nir_deref_type_struct:
 919    case nir_deref_type_cast:
 920    default:
 921       fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
 922    }
 923    return false;
 924 }
 925
 926 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
 927 {
 928    AluInstruction *ir = nullptr;
 929    PValue sv[4];
 930
 931    assert(src.src.is_ssa);
 932
 933    for (int i = 0; i < src.src.ssa->num_components ; ++i)  {
 934       unsigned uindex = (src.src.ssa->index << 2) + i;
 935       sv[i] = uniform(uindex);
 936       assert(sv[i]);
 937    }
 938
 939    for (int i = 0; i < src.src.ssa->num_components ; ++i) {
 940       ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
 941                               EmitInstruction::write);
 942       emit_instruction(ir);
 943    }
 944    if (ir)
 945       ir->set_flag(alu_last_instr);
 946 }
 947
 948
 949
 950 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
 951                                               std::vector<PValue> srcs,
 952                                               const std::set<AluModifiers>& m_flags)
 953 {
 954    AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
 955    emit_instruction(ir);
 956    return true;
 957 }
 958
 959 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
 960 {
 961    m_output_register_map[loc] = gpr;
 962 }
 963
 964 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
 965 {
 966    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
 967    m_export_output.emit(PInstruction(ir));
 968 }
 969
 970 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
 971 {
 972    const GPRVector *retval = nullptr;
 973    auto val = m_output_register_map.find(location);
 974    if (val != m_output_register_map.end())
 975       retval =  val->second;
 976    return retval;
 977 }
 978
 979 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
 980 {
 981    r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
 982    m_inputs[pos] = var;
 983 }
 984
 985 void ShaderFromNirProcessor::set_output(unsigned pos, PValue var)
 986 {
 987    r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << *var <<  "\n";
 988    m_outputs[pos] = var;
 989 }
 990
 991 void ShaderFromNirProcessor::append_block(int nesting_change)
 992 {
 993    m_nesting_depth += nesting_change;
 994    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
 995 }
 996
 997 void ShaderFromNirProcessor::finalize()
 998 {
 999    do_finalize();
1000
1001    for (auto& i : m_inputs)
1002       m_sh_info.input[i.first].gpr = i.second->sel();
1003
1004    for (auto& i : m_outputs)
1005       m_sh_info.output[i.first].gpr = i.second->sel();
1006
1007    m_output.push_back(m_export_output);
1008 }
1009
1010 }