src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp

   1 /* -*- mesa-c++  -*-
   2  *
   3  * Copyright (c) 2018 Collabora LTD
   4  *
   5  * Author: Gert Wollny <gert.wollny@collabora.com>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * on the rights to use, copy, modify, merge, publish, distribute, sub
  11  * license, and/or sell copies of the Software, and to permit persons to whom
  12  * the Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27
  28 #include "sfn_emitaluinstruction.h"
  29 #include "sfn_debug.h"
  30
  31 #include "gallium/drivers/r600/r600_shader.h"
  32
  33 namespace r600 {
  34
  35 using std::vector;
  36
  37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
  38    EmitInstruction (processor)
  39 {
  40
  41 }
  42
  43 bool EmitAluInstruction::do_emit(nir_instr* ir)
  44 {
  45    const nir_alu_instr& instr = *nir_instr_as_alu(ir);
  46
  47    r600::sfn_log << SfnLog::instr << "emit '"
  48                  << *ir
  49                  << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
  50                  << "' (" << __func__ << ")\n";
  51
  52    split_constants(instr);
  53
  54    switch (instr.op) {
  55    case nir_op_b2f32: return emit_alu_b2f(instr);
  56    case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
  57    case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
  58    case nir_op_b2b1:
  59    case nir_op_mov:return emit_mov(instr);
  60    case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
  61    case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
  62    case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
  63    case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
  64    case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
  65    case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
  66    case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
  67    case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
  68    case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
  69    case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
  70
  71    case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
  72    case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
  73    case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
  74    case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
  75    case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
  76    case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
  77
  78    case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
  79    case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
  80    case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
  81
  82    case nir_op_fsign: return emit_fsign(instr);
  83    case nir_op_fdph:  return emit_fdph(instr);
  84
  85    case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int);
  86    case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint);
  87    case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
  88    case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
  89    case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
  90
  91    case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
  92    case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
  93    case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
  94    case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
  95    case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
  96    case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
  97    case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
  98    case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
  99    case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
 100    case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
 101    case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
 102    case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
 103    case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
 104    case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
 105    case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
 106    case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
 107    case nir_op_iabs: return emit_alu_iabs(instr);
 108    case nir_op_ineg: return emit_alu_ineg(instr);
 109    case nir_op_idiv: return emit_alu_div_int(instr, true, false);
 110    case nir_op_udiv: return emit_alu_div_int(instr, false, false);
 111    case nir_op_umod: return emit_alu_div_int(instr, false, true);
 112    case nir_op_isign: return emit_alu_isign(instr);
 113
 114    case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
 115    case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
 116    case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
 117
 118    case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
 119
 120    case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
 121    case nir_op_fne: return emit_alu_op2(instr, op2_setne_dx10);
 122    case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
 123
 124    case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
 125    case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
 126    case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
 127    case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
 128    case nir_op_fadd: return emit_alu_op2(instr, op2_add);
 129    case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
 130    case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
 131    case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
 132    case nir_op_fdot2: return emit_dot(instr, 2);
 133    case nir_op_fdot3: return emit_dot(instr, 3);
 134    case nir_op_fdot4: return emit_dot(instr, 4);
 135
 136    case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
 137    case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
 138    case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
 139
 140    case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
 141    case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
 142    case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
 143
 144    case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
 145    case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
 146    case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
 147
 148    case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
 149    case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
 150    case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
 151
 152
 153    case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
 154    case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
 155    case nir_op_vec2: return emit_create_vec(instr, 2);
 156    case nir_op_vec3: return emit_create_vec(instr, 3);
 157    case nir_op_vec4: return emit_create_vec(instr, 4);
 158
 159    case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
 160    case nir_op_ufind_msb: return emit_find_msb(instr, false);
 161    case nir_op_ifind_msb: return emit_find_msb(instr, true);
 162    case nir_op_b2i32: return emit_b2i32(instr);
 163    case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
 164    case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
 165    case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
 166    case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
 167    case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
 168    case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
 169
 170
 171    /* These are in the ALU instruction list, but they should be texture instructions */
 172    case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
 173    case nir_op_fddx_coarse:
 174    case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
 175
 176    case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v,  true);
 177    case nir_op_fddy_coarse:
 178    case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
 179
 180    case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24,  {0, 1, 2});
 181    case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
 182    default:
 183       return false;
 184    }
 185 }
 186
 187 void EmitAluInstruction::split_constants(const nir_alu_instr& instr)
 188 {
 189     const nir_op_info *op_info = &nir_op_infos[instr.op];
 190     if (op_info->num_inputs < 2)
 191        return;
 192
 193     int nconst = 0;
 194     std::array<const UniformValue *,4> c;
 195     std::array<int,4> idx;
 196     for (unsigned i = 0; i < op_info->num_inputs; ++i) {
 197        PValue src = from_nir(instr.src[i], 0);
 198        assert(src);
 199        if (src->type() == Value::kconst) {
 200           c[nconst] = static_cast<const UniformValue *>(src.get());
 201
 202           idx[nconst++] = i;
 203        }
 204     }
 205     if (nconst < 2)
 206        return;
 207
 208     unsigned sel = c[0]->sel();
 209     unsigned kcache =  c[0]->kcache_bank();
 210     sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
 211
 212     for (int i = 1; i < nconst; ++i) {
 213        sfn_log << "sel[" << i << "] = " <<  c[i]->sel() << "\n";
 214        if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
 215           load_uniform(instr.src[idx[i]]);
 216        }
 217     }
 218 }
 219
 220 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
 221 {
 222    if (instr.src[0].negate || instr.src[0].abs) {
 223       std::cerr << "source modifiers not supported with int ops\n";
 224       return false;
 225    }
 226
 227    AluInstruction *ir = nullptr;
 228    for (int i = 0; i < 4 ; ++i) {
 229       if (instr.dest.write_mask & (1 << i)){
 230          ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
 231                                  from_nir(instr.src[0], i), write);
 232          emit_instruction(ir);
 233       }
 234    }
 235    if (ir)
 236       ir->set_flag(alu_last_instr);
 237    return true;
 238 }
 239
 240 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
 241                                       const AluOpFlags& flags)
 242 {
 243    AluInstruction *ir = nullptr;
 244    for (int i = 0; i < 4 ; ++i) {
 245       if (instr.dest.write_mask & (1 << i)){
 246          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
 247                                  from_nir(instr.src[0], i), write);
 248
 249          if (flags.test(alu_src0_abs) || instr.src[0].abs)
 250             ir->set_flag(alu_src0_abs);
 251
 252          if (instr.src[0].negate ^ flags.test(alu_src0_neg))
 253             ir->set_flag(alu_src0_neg);
 254
 255          if (flags.test(alu_dst_clamp) || instr.dest.saturate)
 256              ir->set_flag(alu_dst_clamp);
 257
 258          emit_instruction(ir);
 259       }
 260    }
 261    make_last(ir);
 262
 263    return true;
 264 }
 265
 266 bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
 267 {
 268    /* If the op is a plain move beween SSA values we can just forward
 269     * the register reference to the original register */
 270    if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
 271        !instr.src[0].abs && !instr.src[0].negate  && !instr.dest.saturate) {
 272       bool result = true;
 273       for (int i = 0; i < 4 ; ++i) {
 274          if (instr.dest.write_mask & (1 << i)){
 275             auto src = from_nir(instr.src[0], i);
 276             result &= inject_register(instr.dest.dest.ssa.index, i,
 277                                       src, true);
 278
 279             if (src->type() == Value::kconst) {
 280                add_uniform((instr.dest.dest.ssa.index << 2) + i, src);
 281             }
 282          }
 283       }
 284       return result;
 285    } else {
 286       return emit_alu_op1(instr, op1_mov);
 287    }
 288 }
 289
 290 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
 291 {
 292    // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
 293    // then shift back
 294
 295    const float inv_2_pi = 0.15915494f;
 296
 297    PValue v[4]; // this might need some additional temp register creation
 298    for (unsigned i = 0; i < 4 ; ++i)
 299       v[i] = from_nir(instr.dest, i);
 300
 301    PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
 302    AluInstruction *ir = nullptr;
 303    for (unsigned i = 0; i < 4 ; ++i) {
 304       if (!(instr.dest.write_mask & (1 << i)))
 305          continue;
 306       ir = new AluInstruction(op3_muladd_ieee, v[i],
 307                               {from_nir(instr.src[0],i), inv_pihalf, Value::zero_dot_5},
 308                               {alu_write});
 309       if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
 310       emit_instruction(ir);
 311    }
 312    make_last(ir);
 313
 314    for (unsigned i = 0; i < 4 ; ++i) {
 315       if (!(instr.dest.write_mask & (1 << i)))
 316          continue;
 317       ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
 318       emit_instruction(ir);
 319    }
 320    make_last(ir);
 321
 322    for (unsigned i = 0; i < 4 ; ++i) {
 323       if (!(instr.dest.write_mask & (1 << i)))
 324          continue;
 325       ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
 326       ir->set_flag(alu_src1_neg);
 327       emit_instruction(ir);
 328    }
 329    make_last(ir);
 330
 331    for (unsigned i = 0; i < 4 ; ++i) {
 332       if (!(instr.dest.write_mask & (1 << i)))
 333          continue;
 334
 335       ir = new AluInstruction(opcode, v[i], v[i], last_write);
 336       emit_instruction(ir);
 337    }
 338    return true;
 339 }
 340
 341 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
 342                                             bool absolute)
 343 {
 344    AluInstruction *ir = nullptr;
 345    std::set<int> src_idx;
 346
 347    if (get_chip_class() == CAYMAN) {
 348       int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
 349       for (int i = 0; i < last_slot; ++i) {
 350          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
 351                                  from_nir(instr.src[0], 0), instr.dest.write_mask & (1 << i) ? write : empty);
 352          if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
 353          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
 354          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 355
 356          if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
 357
 358          emit_instruction(ir);
 359       }
 360    } else {
 361       for (int i = 0; i < 4 ; ++i) {
 362          if (instr.dest.write_mask & (1 << i)){
 363             ir = new AluInstruction(opcode, from_nir(instr.dest, i),
 364                                     from_nir(instr.src[0], i), last_write);
 365             if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
 366             if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
 367             if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 368             emit_instruction(ir);
 369          }
 370       }
 371    }
 372    return true;
 373 }
 374
 375 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
 376 {
 377    AluInstruction *ir = nullptr;
 378    std::array<PValue, 4> v;
 379
 380    for (int i = 0; i < 4; ++i) {
 381       if (!(instr.dest.write_mask & (1 << i)))
 382          continue;
 383       v[i] = from_nir(instr.dest, i);
 384       ir = new AluInstruction(op1_trunc, v[i], from_nir(instr.src[0], i), {alu_write});
 385       if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
 386       if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
 387       emit_instruction(ir);
 388    }
 389    make_last(ir);
 390
 391    for (int i = 0; i < 4; ++i) {
 392       if (!(instr.dest.write_mask & (1 << i)))
 393          continue;
 394       ir = new AluInstruction(op, v[i], v[i], {alu_write});
 395       emit_instruction(ir);
 396       if (op == op1_flt_to_uint)
 397          make_last(ir);
 398    }
 399    make_last(ir);
 400
 401    return true;
 402 }
 403
 404 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn)
 405 {
 406    int sel_tmp = allocate_temp_register();
 407    int sel_tmp2 = allocate_temp_register();
 408    GPRVector tmp(sel_tmp, {0,1,2,3});
 409    GPRVector tmp2(sel_tmp2, {0,1,2,3});
 410    AluInstruction *ir = nullptr;
 411    EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint;
 412    for (int i = 0; i < 4; ++i) {
 413       if (!(instr.dest.write_mask & (1 << i)))
 414          continue;
 415
 416       ir = new AluInstruction(opcode, tmp.reg_i(i), from_nir(instr.src[0], i), write);
 417       emit_instruction(ir);
 418    }
 419    make_last(ir);
 420
 421    for (int i = 0; i < 4 ; ++i) {
 422       if (!(instr.dest.write_mask & (1 << i)))
 423          continue;
 424
 425       ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i),
 426                               PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write);
 427       emit_instruction(ir);
 428    }
 429    make_last(ir);
 430
 431    for (int i = 0; i < 4 ; ++i) {
 432       if (!(instr.dest.write_mask & (1 << i)))
 433          continue;
 434
 435       ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i),
 436                               tmp2.reg_i(i), tmp.reg_i(i), write);
 437       emit_instruction(ir);
 438    }
 439    make_last(ir);
 440
 441    return true;
 442 }
 443
 444 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
 445 {
 446    AluInstruction *ir = nullptr;
 447    for (int i = 0; i < 4 ; ++i) {
 448       if (!(instr.dest.write_mask & (1 << i)))
 449          continue;
 450
 451       ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
 452                               from_nir(instr.src[0], i), Value::one_i, write);
 453      emit_instruction(ir);
 454    }
 455    make_last(ir);
 456
 457    return true;
 458 }
 459
 460 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
 461 {
 462    AluInstruction *ir = nullptr;
 463    for (unsigned i = 0; i < 2; ++i) {
 464       if (!(instr.dest.write_mask & (1 << i)))
 465          continue;
 466      ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
 467                              from_nir(instr.src[0], i), write);
 468      emit_instruction(ir);
 469    }
 470    ir->set_flag(alu_last_instr);
 471    return true;
 472 }
 473
 474 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
 475 {
 476    emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
 477                                        from_nir(instr.src[0], comp), last_write));
 478    return true;
 479 }
 480
 481 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
 482 {
 483    AluInstruction *ir = nullptr;
 484    std::set<int> src_slot;
 485    for(unsigned i = 0; i < nc; ++i) {
 486       if (instr.dest.write_mask & (1 << i)){
 487          auto src = from_nir(instr.src[i], 0);
 488          ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
 489          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 490
 491          // FIXME: This is a rather crude approach to fix the problem that
 492          // r600 can't read from four different slots of the same component
 493          // here we check only for the register index
 494          if (src->type() == Value::gpr)
 495             src_slot.insert(src->sel());
 496          if (src_slot.size() >= 3) {
 497             src_slot.clear();
 498             ir->set_flag(alu_last_instr);
 499          }
 500          emit_instruction(ir);
 501       }
 502    }
 503    if (ir)
 504       ir->set_flag(alu_last_instr);
 505    return true;
 506 }
 507
 508 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
 509 {
 510    const nir_alu_src& src0 = instr.src[0];
 511    const nir_alu_src& src1 = instr.src[1];
 512
 513    AluInstruction *ir = nullptr;
 514    for (int i = 0; i < n ; ++i) {
 515       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
 516             from_nir(src0, i), from_nir(src1, i),
 517                               instr.dest.write_mask & (1 << i) ? write : empty);
 518
 519       if (src0.negate) ir->set_flag(alu_src0_neg);
 520       if (src0.abs) ir->set_flag(alu_src0_abs);
 521       if (src1.negate) ir->set_flag(alu_src1_neg);
 522       if (src1.abs) ir->set_flag(alu_src1_abs);
 523
 524       if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 525       emit_instruction(ir);
 526    }
 527    for (int i = n; i < 4 ; ++i) {
 528       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
 529                               Value::zero, Value::zero,
 530                               instr.dest.write_mask & (1 << i) ? write : empty);
 531       emit_instruction(ir);
 532    }
 533
 534    if (ir)
 535       ir->set_flag(alu_last_instr);
 536    return true;
 537 }
 538
 539 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
 540 {
 541    const nir_alu_src& src0 = instr.src[0];
 542    const nir_alu_src& src1 = instr.src[1];
 543
 544    AluInstruction *ir = nullptr;
 545    for (int i = 0; i < 3 ; ++i) {
 546       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
 547                               from_nir(src0, i), from_nir(src1, i),
 548                               instr.dest.write_mask & (1 << i) ? write : empty);
 549       if (src0.negate) ir->set_flag(alu_src0_neg);
 550       if (src0.abs) ir->set_flag(alu_src0_abs);
 551       if (src1.negate) ir->set_flag(alu_src1_neg);
 552       if (src1.abs) ir->set_flag(alu_src1_abs);
 553       if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 554       emit_instruction(ir);
 555    }
 556
 557    ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
 558                            from_nir(src1, 3), (instr.dest.write_mask) & (1 << 3) ? write : empty);
 559    if (src1.negate) ir->set_flag(alu_src1_neg);
 560    if (src1.abs) ir->set_flag(alu_src1_abs);
 561    emit_instruction(ir);
 562
 563    ir->set_flag(alu_last_instr);
 564    return true;
 565
 566 }
 567
 568 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
 569 {
 570    AluInstruction *ir = nullptr;
 571    for (int i = 0; i < 4 ; ++i) {
 572       if (instr.dest.write_mask & (1 << i)) {
 573          ir = new AluInstruction(op, from_nir(instr.dest, i),
 574                                  from_nir(instr.src[0], i), Value::zero,
 575                                  write);
 576          emit_instruction(ir);
 577       }
 578    }
 579    if (ir)
 580       ir->set_flag(alu_last_instr);
 581    return true;
 582 }
 583
 584 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
 585 {
 586    AluInstruction *ir = nullptr;
 587    for (int i = 0; i < 4 ; ++i) {
 588       if (instr.dest.write_mask & (1 << i)){
 589          ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
 590                                  from_nir(instr.src[0], i), Value::one_f, write);
 591          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
 592          if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
 593          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 594          emit_instruction(ir);
 595       }
 596    }
 597    if (ir)
 598       ir->set_flag(alu_last_instr);
 599    return true;
 600 }
 601
 602 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
 603 {
 604
 605    AluInstruction *ir = nullptr;
 606    PValue v[4]; // this might need some additional temp register creation
 607    for (unsigned i = 0; i < 4 ; ++i)
 608       v[i] = from_nir(instr.dest, i);
 609
 610    EAluOp combine = all ? op2_and_int : op2_or_int;
 611
 612    /* For integers we can not use the modifiers, so this needs some emulation */
 613    /* Should actually be lowered with NIR */
 614    if (instr.src[0].negate == instr.src[1].negate &&
 615        instr.src[0].abs == instr.src[1].abs) {
 616
 617       for (unsigned i = 0; i < nc ; ++i) {
 618          ir = new AluInstruction(op, v[i], from_nir(instr.src[0], i),
 619                from_nir(instr.src[1], i), write);
 620          emit_instruction(ir);
 621       }
 622       if (ir)
 623          ir->set_flag(alu_last_instr);
 624    } else {
 625       std::cerr << "Negate in iequal/inequal not (yet) supported\n";
 626       return false;
 627    }
 628
 629    for (unsigned i = 0; i < nc/2 ; ++i) {
 630       ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
 631       emit_instruction(ir);
 632    }
 633    if (ir)
 634       ir->set_flag(alu_last_instr);
 635
 636    if (nc > 2) {
 637       ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
 638       emit_instruction(ir);
 639    }
 640
 641    return true;
 642 }
 643
 644 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
 645 {
 646    AluInstruction *ir = nullptr;
 647    PValue v[4]; // this might need some additional temp register creation
 648    for (unsigned i = 0; i < 4 ; ++i)
 649       v[i] = from_nir(instr.dest, i);
 650
 651    for (unsigned i = 0; i < nc ; ++i) {
 652       ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
 653             from_nir(instr.src[1],i), write);
 654
 655       if (instr.src[0].abs)
 656          ir->set_flag(alu_src0_abs);
 657       if (instr.src[0].negate)
 658          ir->set_flag(alu_src0_neg);
 659
 660       if (instr.src[1].abs)
 661          ir->set_flag(alu_src1_abs);
 662       if (instr.src[1].negate)
 663          ir->set_flag(alu_src1_neg);
 664
 665       emit_instruction(ir);
 666    }
 667    if (ir)
 668       ir->set_flag(alu_last_instr);
 669
 670    for (unsigned i = 0; i < nc ; ++i) {
 671       ir = new AluInstruction(op1_max4, v[i], v[i], write);
 672       if (all) ir->set_flag(alu_src0_neg);
 673       emit_instruction(ir);
 674    }
 675
 676    for (unsigned i = nc; i < 4 ; ++i) {
 677       ir = new AluInstruction(op1_max4, v[i],
 678                               all ? Value::one_f : Value::zero, write);
 679       if (all)
 680          ir->set_flag(alu_src0_neg);
 681
 682       emit_instruction(ir);
 683    }
 684
 685    ir->set_flag(alu_last_instr);
 686
 687    if (all)
 688       op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
 689    else
 690       op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
 691
 692    ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
 693    if (all)
 694       ir->set_flag(alu_src1_neg);
 695    emit_instruction(ir);
 696
 697    return true;
 698 }
 699
 700 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
 701 {
 702    AluInstruction *ir = nullptr;
 703    PValue v[4]; // this might need some additional temp register creation
 704    for (unsigned i = 0; i < 4 ; ++i)
 705       v[i] = from_nir(instr.dest, i);
 706
 707    for (unsigned i = 0; i < 2 ; ++i) {
 708       ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
 709             from_nir(instr.src[1],i), write);
 710       if (instr.src[0].abs)
 711          ir->set_flag(alu_src0_abs);
 712       if (instr.src[0].negate)
 713          ir->set_flag(alu_src0_neg);
 714
 715       if (instr.src[1].abs)
 716          ir->set_flag(alu_src1_abs);
 717       if (instr.src[1].negate)
 718          ir->set_flag(alu_src1_neg);
 719
 720       emit_instruction(ir);
 721    }
 722    if (ir)
 723       ir->set_flag(alu_last_instr);
 724
 725    op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
 726    ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
 727    emit_instruction(ir);
 728
 729    return true;
 730 }
 731
 732 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
 733 {
 734    const nir_alu_src& src0 = instr.src[0];
 735    const nir_alu_src& src1 = instr.src[1];
 736
 737    AluInstruction *ir = nullptr;
 738
 739    if (get_chip_class() == CAYMAN) {
 740       int lasti = util_last_bit(instr.dest.write_mask);
 741       for (int k = 0; k < lasti ; ++k) {
 742          if (instr.dest.write_mask & (1 << k)) {
 743
 744             for (int i = 0; i < 4; i++) {
 745                ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, k), from_nir(src1, k), (i == k) ? write : empty);
 746                if (src0.negate) ir->set_flag(alu_src0_neg);
 747             if (src0.abs) ir->set_flag(alu_src0_abs);
 748             if (src1.negate) ir->set_flag(alu_src1_neg);
 749             if (src1.abs) ir->set_flag(alu_src1_abs);
 750             if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 751             if (i == 3) ir->set_flag(alu_last_instr);
 752             emit_instruction(ir);
 753             }
 754          }
 755       }
 756    } else {
 757       for (int i = 0; i < 4 ; ++i) {
 758          if (instr.dest.write_mask & (1 << i)){
 759             ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, i), from_nir(src1, i), last_write);
 760             if (src0.negate) ir->set_flag(alu_src0_neg);
 761             if (src0.abs) ir->set_flag(alu_src0_abs);
 762             if (src1.negate) ir->set_flag(alu_src1_neg);
 763             if (src1.abs) ir->set_flag(alu_src1_abs);
 764             if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 765             emit_instruction(ir);
 766          }
 767       }
 768    }
 769    return true;
 770 }
 771
 772 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
 773 {
 774
 775    const nir_alu_src& src0 = instr.src[0];
 776    const nir_alu_src& src1 = instr.src[1];
 777
 778    if (src0.negate || src1.negate ||
 779        src0.abs || src1.abs) {
 780       std::cerr << "R600: don't support modifiers with integer operations";
 781       return false;
 782    }
 783    return emit_alu_op2(instr, opcode, opts);
 784 }
 785
 786 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
 787 {
 788    const nir_alu_src *src0 = &instr.src[0];
 789    const nir_alu_src *src1 = &instr.src[1];
 790
 791    if (ops & op2_opt_reverse)
 792       std::swap(src0, src1);
 793
 794    bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
 795
 796    AluInstruction *ir = nullptr;
 797    for (int i = 0; i < 4 ; ++i) {
 798       if (instr.dest.write_mask & (1 << i)){
 799          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
 800                                  from_nir(*src0, i), from_nir(*src1, i), write);
 801
 802          if (src0->negate) ir->set_flag(alu_src0_neg);
 803          if (src0->abs) ir->set_flag(alu_src0_abs);
 804          if (src1_negate) ir->set_flag(alu_src1_neg);
 805          if (src1->abs) ir->set_flag(alu_src1_abs);
 806          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 807          emit_instruction(ir);
 808       }
 809    }
 810    if (ir)
 811       ir->set_flag(alu_last_instr);
 812    return true;
 813 }
 814
 815 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
 816 {
 817    const nir_alu_src *src0 = &instr.src[0];
 818    const nir_alu_src *src1 = &instr.src[1];
 819
 820    if (ops & op2_opt_reverse)
 821       std::swap(src0, src1);
 822
 823    GPRVector::Values v0;
 824    for (int i = 0; i < 4 ; ++i)
 825       v0[i] = from_nir(*src0, i);
 826
 827    GPRVector::Values v1;
 828    for (int i = 0; i < 4 ; ++i)
 829       v1[i] = from_nir(*src1, i);
 830
 831    if (src0->abs ||   src0->negate) {
 832       int src0_tmp = allocate_temp_register();
 833       GPRVector::Values v0_temp;
 834       AluInstruction *ir = nullptr;
 835       for (int i = 0; i < 4 ; ++i) {
 836          if (instr.dest.write_mask & (1 << i)) {
 837             v0_temp[i] = PValue(new GPRValue(src0_tmp, i));
 838             ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write);
 839             if (src0->abs) ir->set_flag(alu_src0_abs);
 840             if (src0->negate) ir->set_flag(alu_src0_neg);
 841             emit_instruction(ir);
 842             v0[i] = v0_temp[i];
 843          }
 844       }
 845       if (ir)
 846          ir->set_flag(alu_last_instr);
 847    }
 848
 849    if (src1->abs || src1->negate) {
 850       int src1_tmp = allocate_temp_register();
 851       GPRVector::Values v1_temp;
 852       AluInstruction *ir = nullptr;
 853       for (int i = 0; i < 4 ; ++i) {
 854          if (instr.dest.write_mask & (1 << i)) {
 855             v1_temp[i] = PValue(new GPRValue(src1_tmp, i));
 856             ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write});
 857             if (src1->abs) ir->set_flag(alu_src0_abs);
 858             if (src1->negate) ir->set_flag(alu_src0_neg);
 859             emit_instruction(ir);
 860             v1[i] = v1_temp[i];
 861          }
 862       }
 863       if (ir)
 864          ir->set_flag(alu_last_instr);
 865    }
 866
 867    AluInstruction *ir = nullptr;
 868    for (int i = 0; i < 4 ; ++i) {
 869       if (instr.dest.write_mask & (1 << i)){
 870          ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write});
 871          emit_instruction(ir);
 872       }
 873    }
 874    if (ir)
 875       ir->set_flag(alu_last_instr);
 876    return true;
 877 }
 878
 879
 880 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr)
 881 {
 882    int sel_tmp = allocate_temp_register();
 883    GPRVector tmp(sel_tmp, {0,1,2,3});
 884
 885    AluInstruction *ir = nullptr;
 886    PValue help[4];
 887
 888    for (int i = 0; i < 4 ; ++i) {
 889       if (instr.dest.write_mask & (1 << i)){
 890          help[i] = from_nir(instr.dest, i);
 891          auto s = from_nir(instr.src[0], i);
 892          ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write);
 893          emit_instruction(ir);
 894       }
 895    }
 896    if (ir)
 897       ir->set_flag(alu_last_instr);
 898
 899    for (int i = 0; i < 4 ; ++i) {
 900       if (instr.dest.write_mask & (1 << i)){
 901          ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write);
 902          emit_instruction(ir);
 903       }
 904    }
 905    if (ir)
 906       ir->set_flag(alu_last_instr);
 907
 908    for (int i = 0; i < 4 ; ++i) {
 909       if (instr.dest.write_mask & (1 << i)){
 910
 911          ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i),
 912                                  PValue(new LiteralValue(-1,0)), help[i], write);
 913          emit_instruction(ir);
 914       }
 915    }
 916    if (ir)
 917       ir->set_flag(alu_last_instr);
 918    return true;
 919 }
 920
 921 bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr)
 922 {
 923    PValue help[4];
 924    PValue src[4];
 925    AluInstruction *ir = nullptr;
 926
 927    for (int i = 0; i < 4 ; ++i) {
 928       help[i] = from_nir(instr.dest, i);
 929       src[i] = from_nir(instr.src[0], i);
 930    }
 931
 932    if (instr.src[0].abs) {
 933
 934       for (int i = 0; i < 4 ; ++i) {
 935          if (instr.dest.write_mask & (1 << i)){
 936             ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write);
 937             ir->set_flag(alu_src0_abs);
 938             emit_instruction(ir);
 939          }
 940       }
 941       if (ir)
 942          ir->set_flag(alu_last_instr);
 943
 944       if (instr.src[0].negate) {
 945          for (int i = 0; i < 4 ; ++i) {
 946             if (instr.dest.write_mask & (1 << i)){
 947                ir = new AluInstruction(op1_mov, help[i], help[i], write);
 948                ir->set_flag(alu_src0_neg);
 949                emit_instruction(ir);
 950             }
 951          }
 952          if (ir)
 953             ir->set_flag(alu_last_instr);
 954       }
 955
 956       return true;
 957    }
 958
 959    for (int i = 0; i < 4 ; ++i) {
 960       if (instr.dest.write_mask & (1 << i)){
 961          ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write);
 962          if (instr.src[0].negate) {
 963             ir->set_flag(alu_src0_neg);
 964             ir->set_flag(alu_src2_neg);
 965          }
 966          emit_instruction(ir);
 967       }
 968    }
 969
 970    if (ir)
 971       ir->set_flag(alu_last_instr);
 972
 973    for (int i = 0; i < 4 ; ++i) {
 974       if (instr.dest.write_mask & (1 << i)){
 975          ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write);
 976          ir->set_flag(alu_src0_neg);
 977          ir->set_flag(alu_src1_neg);
 978          emit_instruction(ir);
 979       }
 980    }
 981    if (ir)
 982       ir->set_flag(alu_last_instr);
 983    return true;
 984 }
 985
 986 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
 987                                       std::array<uint8_t, 3> reorder)
 988 {
 989    const nir_alu_src *src[3];
 990    src[0] = &instr.src[reorder[0]];
 991    src[1] = &instr.src[reorder[1]];
 992    src[2] = &instr.src[reorder[2]];
 993
 994    AluInstruction *ir = nullptr;
 995    for (int i = 0; i < 4 ; ++i) {
 996       if (instr.dest.write_mask & (1 << i)){
 997          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
 998                                  from_nir(*src[0], i), from_nir(*src[1], i),
 999                                  from_nir(*src[2], i), write);
1000
1001          if (src[0]->negate) ir->set_flag(alu_src0_neg);
1002          if (src[1]->negate) ir->set_flag(alu_src1_neg);
1003          if (src[2]->negate) ir->set_flag(alu_src2_neg);
1004
1005          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
1006          ir->set_flag(alu_write);
1007          emit_instruction(ir);
1008       }
1009    }
1010    if (ir)
1011       ir->set_flag(alu_last_instr);
1012    return true;
1013 }
1014
1015 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
1016 {
1017    AluInstruction *ir = nullptr;
1018    for (int i = 0; i < 4 ; ++i) {
1019       if (instr.dest.write_mask & (1 << i)){
1020          ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
1021                                  from_nir(instr.src[0], i), write);
1022          emit_instruction(ir);
1023       }
1024    }
1025    if (ir)
1026       ir->set_flag(alu_last_instr);
1027
1028    return true;
1029 }
1030
1031 static const char swz[] = "xyzw01?_";
1032
1033
1034
1035 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr)
1036 {
1037    int sel_tmp = allocate_temp_register();
1038    GPRVector tmp(sel_tmp, {0,1,2,3});
1039
1040    std::array<PValue,4> src;
1041    AluInstruction *ir = nullptr;
1042    for (int i = 0; i < 4 ; ++i) {
1043       if (instr.dest.write_mask & (1 << i)){
1044          src[i] = from_nir(instr.src[0],i);
1045          ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, src[i], write);
1046          emit_instruction(ir);
1047       }
1048    }
1049    if (ir)
1050       ir->set_flag(alu_last_instr);
1051
1052    for (int i = 0; i < 4 ; ++i) {
1053       if (instr.dest.write_mask & (1 << i)){
1054          ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), src[i],
1055                                  src[i], tmp.reg_i(i), write);
1056          emit_instruction(ir);
1057       }
1058    }
1059    if (ir)
1060       ir->set_flag(alu_last_instr);
1061    return true;
1062 }
1063
1064 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod)
1065 {
1066
1067    int sel_tmp = allocate_temp_register();
1068    int sel_tmp0 = allocate_temp_register();
1069    int sel_tmp1 = allocate_temp_register();
1070
1071    PValue asrc1(new GPRValue(sel_tmp, 0));
1072    PValue asrc2(new GPRValue(sel_tmp, 1));
1073    PValue rsign(new GPRValue(sel_tmp, 2));
1074    PValue err(new GPRValue(sel_tmp, 3));
1075
1076    GPRVector tmp0(sel_tmp0, {0,1,2,3});
1077    GPRVector tmp1(sel_tmp1, {0,1,2,3});
1078
1079    std::array<PValue, 4> src0;
1080    std::array<PValue, 4> src1;
1081
1082    for (int i = 0; i < 4 ; ++i) {
1083       if (instr.dest.write_mask & (1 << i)) {
1084          src0[i] = from_nir(instr.src[0], i);
1085          src1[i] = from_nir(instr.src[1], i);
1086       }
1087    }
1088
1089
1090    for (int i = 3; i >= 0 ; --i) {
1091       if (!(instr.dest.write_mask & (1 << i)))
1092          continue;
1093       if (use_signed) {
1094          emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write});
1095          emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write});
1096          emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr});
1097
1098
1099          emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write});
1100          emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr});
1101       } else {
1102          asrc1 = src0[i];
1103          asrc2 = src1[i];
1104       }
1105
1106       emit_instruction(op1_recip_uint,  tmp0.x(), {asrc2}, {alu_write, alu_last_instr});
1107
1108       emit_instruction(op2_mullo_uint,  tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr});
1109
1110       emit_instruction(op2_sub_int,  tmp0.w(), {Value::zero, tmp0.z()}, {alu_write});
1111       emit_instruction(op2_mulhi_uint,  tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr});
1112
1113       emit_instruction(op3_cnde_int,  tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr});
1114
1115       emit_instruction(op2_mulhi_uint,  err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr});
1116
1117       emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write});
1118       emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr});
1119
1120       emit_instruction(op3_cnde_int,  tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr});
1121
1122       emit_instruction(op2_mulhi_uint,  tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr});
1123       emit_instruction(op2_mullo_uint,  tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr});
1124
1125       emit_instruction(op2_sub_int,  tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr});
1126
1127
1128       emit_instruction(op2_setge_uint,  tmp1.x(), {tmp0.w(), asrc2}, {alu_write});
1129       emit_instruction(op2_setge_uint,  tmp1.y(), {asrc1, tmp0.y()}, {alu_write});
1130
1131       if (mod) {
1132          emit_instruction(op2_sub_int,  tmp1.z(), {tmp0.w(), asrc2}, {alu_write});
1133          emit_instruction(op2_add_int,  tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr});
1134       } else {
1135          emit_instruction(op2_add_int,  tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write});
1136          emit_instruction(op2_sub_int,  tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr});
1137       }
1138
1139       emit_instruction(op2_and_int,  tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr});
1140
1141       if (mod)
1142          emit_instruction(op3_cnde_int,  tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr});
1143       else
1144          emit_instruction(op3_cnde_int,  tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr});
1145
1146       if (use_signed) {
1147          emit_instruction(op3_cnde_int,  tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1148          emit_instruction(op2_sub_int,  tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr});
1149
1150          if (mod)
1151             emit_instruction(op3_cndge_int,  from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()},
1152                           {alu_write, alu_last_instr});
1153          else
1154             emit_instruction(op3_cndge_int,  from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()},
1155                           {alu_write, alu_last_instr});
1156       } else {
1157          emit_instruction(op3_cnde_int,  from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1158       }
1159    }
1160    return true;
1161 }
1162
1163 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, GPRVector::Values& s,
1164                                              GPRVector::Values& v, int ncomp)
1165 {
1166
1167    AluInstruction *alu = nullptr;
1168    for (int i = 0; i < ncomp; ++i) {
1169       alu  = new AluInstruction(op1_mov,  v[i], s[i], {alu_write});
1170       if (src.abs)
1171          alu->set_flag(alu_src0_abs);
1172       if (src.negate)
1173          alu->set_flag(alu_src0_neg);
1174       emit_instruction(alu);
1175    }
1176    make_last(alu);
1177 }
1178
1179 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
1180                                       bool fine)
1181 {
1182
1183    GPRVector::Values v;
1184    GPRVector::Values s;
1185    GPRVector::Values *source = &s;
1186    std::array<int, 4> writemask = {0,1,2,3};
1187
1188    int ncomp = instr.src[0].src.is_ssa ? instr.src[0].src.ssa->num_components :
1189                instr.src[0].src.reg.reg->num_components;
1190
1191    for (int i = 0; i < 4; ++i) {
1192       writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
1193       v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
1194       s[i] = from_nir(instr.src[0], (i < ncomp) ? i : 0);
1195    }
1196
1197    if (instr.src[0].abs || instr.src[0].negate) {
1198       split_alu_modifiers(instr.src[0], s, v, ncomp);
1199       source = &v;
1200    }
1201
1202    /* This is querying the dreivatives of the output fb, so we would either need
1203     * access to the neighboring pixels or to the framebuffer. Neither is currently
1204     * implemented */
1205    GPRVector dst(v);
1206    GPRVector src(*source);
1207
1208    auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
1209    tex->set_dest_swizzle(writemask);
1210
1211    if (fine) {
1212       std::cerr << "Sewt fine flag\n";
1213       tex->set_flag(TexInstruction::grad_fine);
1214    }
1215
1216    emit_instruction(tex);
1217
1218    return true;
1219 }
1220
1221 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode)
1222 {
1223    int itmp = allocate_temp_register();
1224    std::array<PValue, 4> tmp;
1225    std::array<PValue, 4> dst;
1226    std::array<PValue, 4> src0;
1227    std::array<PValue, 4> shift;
1228
1229    PValue l32(new LiteralValue(32));
1230    unsigned write_mask = instr.dest.write_mask;
1231
1232    AluInstruction *ir = nullptr;
1233    for (int i = 0; i < 4; i++) {
1234       if (!(write_mask & (1<<i)))
1235                         continue;
1236       dst[i] = from_nir(instr.dest, i);
1237       src0[i] = from_nir(instr.src[0], i);
1238       shift[i] = from_nir(instr.src[2], i);
1239
1240       ir = new AluInstruction(opcode, dst[i],
1241                               {src0[i], from_nir(instr.src[1], i), shift[i]},
1242                               {alu_write});
1243       emit_instruction(ir);
1244    }
1245    make_last(ir);
1246
1247    for (int i = 0; i < 4; i++) {
1248       if (!(write_mask & (1<<i)))
1249                         continue;
1250       tmp[i] = PValue(new GPRValue(itmp, i));
1251       ir = new AluInstruction(op2_setge_int, tmp[i], {shift[i], l32},
1252       {alu_write});
1253       emit_instruction(ir);
1254    }
1255    make_last(ir);
1256
1257    for (int i = 0; i < 4; i++) {
1258       if (!(write_mask & (1<<i)))
1259                         continue;
1260       ir = new AluInstruction(op3_cnde_int, dst[i], {tmp[i], dst[i], src0[i]},
1261                               {alu_write});
1262       emit_instruction(ir);
1263    }
1264    make_last(ir);
1265
1266    return true;
1267 }
1268
1269 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
1270 {
1271    auto t0 = get_temp_vec4();
1272    auto t1 = get_temp_vec4();
1273    auto t2 = get_temp_vec4();
1274
1275    PValue l32(new LiteralValue(32));
1276    unsigned write_mask = instr.dest.write_mask;
1277    if (!write_mask) return true;
1278
1279    AluInstruction *ir = nullptr;
1280    for (int i = 0; i < 4; i++) {
1281       if (!(write_mask & (1<<i)))
1282                         continue;
1283
1284       ir = new AluInstruction(op2_setge_int, t0[i], {from_nir(instr.src[3], i), l32}, {alu_write});
1285       emit_instruction(ir);
1286    }
1287    make_last(ir);
1288
1289    for (int i = 0; i < 4; i++) {
1290       if (!(write_mask & (1<<i)))
1291                         continue;
1292       ir = new AluInstruction(op2_bfm_int, t1[i], {from_nir(instr.src[3], i),
1293                                                    from_nir(instr.src[2], i)}, {alu_write});
1294       emit_instruction(ir);
1295    }
1296    ir->set_flag(alu_last_instr);
1297
1298    for (int i = 0; i < 4; i++) {
1299       if (!(write_mask & (1<<i)))
1300                         continue;
1301       ir = new AluInstruction(op2_lshl_int, t2[i], {from_nir(instr.src[1], i),
1302                                                     from_nir(instr.src[2], i)}, {alu_write});
1303       emit_instruction(ir);
1304    }
1305    ir->set_flag(alu_last_instr);
1306
1307
1308    for (int i = 0; i < 4; i++) {
1309       if (!(write_mask & (1<<i)))
1310                         continue;
1311       ir = new AluInstruction(op3_bfi_int, from_nir(instr.dest, i),
1312                   {t1[i], t2[i], from_nir(instr.src[0], i)}, {alu_write});
1313       emit_instruction(ir);
1314    }
1315    ir->set_flag(alu_last_instr);
1316
1317    for (int i = 0; i < 4; i++) {
1318       if (!(write_mask & (1<<i)))
1319                         continue;
1320       ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
1321                              {t0[i], from_nir(instr.dest, i),
1322                                      from_nir(instr.src[1], i)}, {alu_write});
1323       emit_instruction(ir);
1324    }
1325    ir->set_flag(alu_last_instr);
1326
1327    return true;
1328 }
1329
1330 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1331 {
1332    emit_instruction(op2_lshr_int, from_nir(instr.dest, 0),
1333    {from_nir(instr.src[0], 0), PValue(new LiteralValue(16))},
1334    {alu_write, alu_last_instr});
1335
1336    emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1337    {from_nir(instr.dest, 0)},{alu_write, alu_last_instr});
1338
1339    return true;
1340 }
1341
1342 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1343 {
1344    emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1345    {from_nir(instr.src[0], 0)},{alu_write, alu_last_instr});
1346    return true;
1347 }
1348
1349 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1350 {
1351    int it0 = allocate_temp_register();
1352    PValue x(new GPRValue(it0, 0));
1353    PValue y(new GPRValue(it0, 1));
1354
1355    emit_instruction(op1_flt32_to_flt16, x,{from_nir(instr.src[0], 0)},{alu_write});
1356    emit_instruction(op1_flt32_to_flt16, y,{from_nir(instr.src[1], 0)},{alu_write, alu_last_instr});
1357
1358    emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1359
1360    emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1361
1362    return true;
1363 }
1364
1365 }