src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp

   1 /* -*- mesa-c++  -*-
   2  *
   3  * Copyright (c) 2018 Collabora LTD
   4  *
   5  * Author: Gert Wollny <gert.wollny@collabora.com>
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * on the rights to use, copy, modify, merge, publish, distribute, sub
  11  * license, and/or sell copies of the Software, and to permit persons to whom
  12  * the Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the next
  15  * paragraph) shall be included in all copies or substantial portions of the
  16  * Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  */
  26
  27
  28 #include "sfn_emitaluinstruction.h"
  29 #include "sfn_debug.h"
  30
  31 #include "gallium/drivers/r600/r600_shader.h"
  32
  33 namespace r600 {
  34
  35 using std::vector;
  36
  37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
  38    EmitInstruction (processor)
  39 {
  40
  41 }
  42
  43 bool EmitAluInstruction::do_emit(nir_instr* ir)
  44 {
  45    const nir_alu_instr& instr = *nir_instr_as_alu(ir);
  46
  47    r600::sfn_log << SfnLog::instr << "emit '"
  48                  << *ir
  49                  << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
  50                  << "' (" << __func__ << ")\n";
  51
  52    split_constants(instr);
  53
  54    switch (instr.op) {
  55    case nir_op_b2f32: return emit_alu_b2f(instr);
  56    case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
  57    case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
  58    case nir_op_b2b1:
  59    case nir_op_mov:return emit_alu_op1(instr, op1_mov);
  60    case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
  61    case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
  62    case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
  63    case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
  64    case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
  65    case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
  66    case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
  67    case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
  68    case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
  69    case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
  70
  71    case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
  72    case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
  73    case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
  74    case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
  75    case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
  76    case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
  77
  78    case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
  79    case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
  80    case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
  81
  82    case nir_op_fsign: return emit_fsign(instr);
  83    case nir_op_fdph:  return emit_fdph(instr);
  84
  85    case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int);
  86    case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint);
  87    case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
  88    case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
  89    case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
  90
  91    case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
  92    case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
  93    case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
  94    case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
  95    case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
  96    case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
  97    case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
  98    case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
  99    case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
 100    case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
 101    case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
 102    case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
 103    case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
 104    case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
 105    case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
 106    case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
 107    case nir_op_iabs: return emit_alu_iabs(instr);
 108    case nir_op_ineg: return emit_alu_ineg(instr);
 109    case nir_op_idiv: return emit_alu_div_int(instr, true, false);
 110    case nir_op_udiv: return emit_alu_div_int(instr, false, false);
 111    case nir_op_umod: return emit_alu_div_int(instr, false, true);
 112    case nir_op_isign: return emit_alu_isign(instr);
 113
 114    case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
 115    case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
 116    case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
 117
 118    case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
 119
 120    case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
 121    case nir_op_fne: return emit_alu_op2(instr, op2_setne_dx10);
 122    case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
 123
 124    case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
 125    case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
 126    case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
 127    case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
 128    case nir_op_fadd: return emit_alu_op2(instr, op2_add);
 129    case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
 130    case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
 131    case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
 132    case nir_op_fdot2: return emit_dot(instr, 2);
 133    case nir_op_fdot3: return emit_dot(instr, 3);
 134    case nir_op_fdot4: return emit_dot(instr, 4);
 135
 136    case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
 137    case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
 138    case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
 139
 140    case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
 141    case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
 142    case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
 143
 144    case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
 145    case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
 146    case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
 147
 148    case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
 149    case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
 150    case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
 151
 152
 153    case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
 154    case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
 155    case nir_op_vec2: return emit_create_vec(instr, 2);
 156    case nir_op_vec3: return emit_create_vec(instr, 3);
 157    case nir_op_vec4: return emit_create_vec(instr, 4);
 158
 159    case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
 160    case nir_op_ufind_msb: return emit_find_msb(instr, false);
 161    case nir_op_ifind_msb: return emit_find_msb(instr, true);
 162    case nir_op_b2i32: return emit_b2i32(instr);
 163    case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
 164    case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
 165    case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
 166    case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
 167    case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
 168    case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
 169
 170
 171    /* These are in the ALU instruction list, but they should be texture instructions */
 172    case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
 173    case nir_op_fddx_coarse:
 174    case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
 175
 176    case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v,  true);
 177    case nir_op_fddy_coarse:
 178    case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
 179
 180    case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24,  {0, 1, 2});
 181    case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
 182    default:
 183       return false;
 184    }
 185 }
 186
 187 void EmitAluInstruction::split_constants(const nir_alu_instr& instr)
 188 {
 189     const nir_op_info *op_info = &nir_op_infos[instr.op];
 190     if (op_info->num_inputs < 2)
 191        return;
 192
 193     int nconst = 0;
 194     std::array<PValue,4> c;
 195     std::array<int,4> idx;
 196     for (unsigned i = 0; i < op_info->num_inputs; ++i) {
 197        PValue src = from_nir(instr.src[i], 0);
 198        assert(src);
 199        if (src->type() == Value::kconst) {
 200           c[nconst] = src;
 201
 202           idx[nconst++] = i;
 203        }
 204     }
 205     if (nconst < 2)
 206        return;
 207
 208     unsigned sel = c[0]->sel();
 209     sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
 210
 211     for (int i = 1; i < nconst; ++i) {
 212        sfn_log << "sel[" << i << "] = " <<  c[i]->sel() << "\n";
 213        if (c[i]->sel() != sel) {
 214           load_uniform(instr.src[idx[i]]);
 215        }
 216     }
 217 }
 218
 219 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
 220 {
 221    if (instr.src[0].negate || instr.src[0].abs) {
 222       std::cerr << "source modifiers not supported with int ops\n";
 223       return false;
 224    }
 225
 226    AluInstruction *ir = nullptr;
 227    for (int i = 0; i < 4 ; ++i) {
 228       if (instr.dest.write_mask & (1 << i)){
 229          ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
 230                                  from_nir(instr.src[0], i), write);
 231          emit_instruction(ir);
 232       }
 233    }
 234    if (ir)
 235       ir->set_flag(alu_last_instr);
 236    return true;
 237 }
 238
 239 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
 240                                       const AluOpFlags& flags)
 241 {
 242    AluInstruction *ir = nullptr;
 243    for (int i = 0; i < 4 ; ++i) {
 244       if (instr.dest.write_mask & (1 << i)){
 245          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
 246                                  from_nir(instr.src[0], i), write);
 247
 248          if (flags.test(alu_src0_abs) || instr.src[0].abs)
 249             ir->set_flag(alu_src0_abs);
 250
 251          if (instr.src[0].negate ^ flags.test(alu_src0_neg))
 252             ir->set_flag(alu_src0_neg);
 253
 254          if (flags.test(alu_dst_clamp) || instr.dest.saturate)
 255              ir->set_flag(alu_dst_clamp);
 256
 257          emit_instruction(ir);
 258       }
 259    }
 260    make_last(ir);
 261
 262    return true;
 263 }
 264
 265 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
 266 {
 267    // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
 268    // then shift back
 269
 270    const float inv_2_pi = 0.15915494f;
 271
 272    PValue v[4]; // this might need some additional temp register creation
 273    for (unsigned i = 0; i < 4 ; ++i)
 274       v[i] = from_nir(instr.dest, i);
 275
 276    PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
 277    AluInstruction *ir = nullptr;
 278    for (unsigned i = 0; i < 4 ; ++i) {
 279       if (!(instr.dest.write_mask & (1 << i)))
 280          continue;
 281       ir = new AluInstruction(op3_muladd_ieee, v[i],
 282                               {from_nir(instr.src[0],i), inv_pihalf, Value::zero_dot_5},
 283                               {alu_write});
 284       if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
 285       emit_instruction(ir);
 286    }
 287    make_last(ir);
 288
 289    for (unsigned i = 0; i < 4 ; ++i) {
 290       if (!(instr.dest.write_mask & (1 << i)))
 291          continue;
 292       ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
 293       emit_instruction(ir);
 294    }
 295    make_last(ir);
 296
 297    for (unsigned i = 0; i < 4 ; ++i) {
 298       if (!(instr.dest.write_mask & (1 << i)))
 299          continue;
 300       ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
 301       ir->set_flag(alu_src1_neg);
 302       emit_instruction(ir);
 303    }
 304    make_last(ir);
 305
 306    for (unsigned i = 0; i < 4 ; ++i) {
 307       if (!(instr.dest.write_mask & (1 << i)))
 308          continue;
 309
 310       ir = new AluInstruction(opcode, v[i], v[i], last_write);
 311       emit_instruction(ir);
 312    }
 313    return true;
 314 }
 315
 316 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
 317                                             bool absolute)
 318 {
 319    AluInstruction *ir = nullptr;
 320    std::set<int> src_idx;
 321    for (int i = 0; i < 4 ; ++i) {
 322       if (instr.dest.write_mask & (1 << i)){
 323          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
 324                                  from_nir(instr.src[0], i), last_write);
 325          if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
 326          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
 327          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 328          emit_instruction(ir);
 329       }
 330    }
 331    return true;
 332 }
 333
 334 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
 335 {
 336    AluInstruction *ir = nullptr;
 337    std::array<PValue, 4> v;
 338
 339    for (int i = 0; i < 4; ++i) {
 340       if (!(instr.dest.write_mask & (1 << i)))
 341          continue;
 342       v[i] = from_nir(instr.dest, i);
 343       ir = new AluInstruction(op1_trunc, v[i], from_nir(instr.src[0], i), {alu_write});
 344       if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
 345       if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
 346       emit_instruction(ir);
 347    }
 348    make_last(ir);
 349
 350    for (int i = 0; i < 4; ++i) {
 351       if (!(instr.dest.write_mask & (1 << i)))
 352          continue;
 353       ir = new AluInstruction(op, v[i], v[i], {alu_write});
 354       emit_instruction(ir);
 355       if (op == op1_flt_to_uint)
 356          make_last(ir);
 357    }
 358    make_last(ir);
 359
 360    return true;
 361 }
 362
 363 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn)
 364 {
 365    int sel_tmp = allocate_temp_register();
 366    int sel_tmp2 = allocate_temp_register();
 367    GPRVector tmp(sel_tmp, {0,1,2,3});
 368    GPRVector tmp2(sel_tmp2, {0,1,2,3});
 369    AluInstruction *ir = nullptr;
 370    EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint;
 371    for (int i = 0; i < 4; ++i) {
 372       if (!(instr.dest.write_mask & (1 << i)))
 373          continue;
 374
 375       ir = new AluInstruction(opcode, tmp.reg_i(i), from_nir(instr.src[0], i), write);
 376       emit_instruction(ir);
 377    }
 378    make_last(ir);
 379
 380    for (int i = 0; i < 4 ; ++i) {
 381       if (!(instr.dest.write_mask & (1 << i)))
 382          continue;
 383
 384       ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i),
 385                               PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write);
 386       emit_instruction(ir);
 387    }
 388    make_last(ir);
 389
 390    for (int i = 0; i < 4 ; ++i) {
 391       if (!(instr.dest.write_mask & (1 << i)))
 392          continue;
 393
 394       ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i),
 395                               tmp2.reg_i(i), tmp.reg_i(i), write);
 396       emit_instruction(ir);
 397    }
 398    make_last(ir);
 399
 400    return true;
 401 }
 402
 403 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
 404 {
 405    AluInstruction *ir = nullptr;
 406    for (int i = 0; i < 4 ; ++i) {
 407       if (!(instr.dest.write_mask & (1 << i)))
 408          continue;
 409
 410       ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
 411                               from_nir(instr.src[0], i), Value::one_i, write);
 412      emit_instruction(ir);
 413    }
 414    make_last(ir);
 415
 416    return true;
 417 }
 418
 419 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
 420 {
 421    AluInstruction *ir = nullptr;
 422    for (unsigned i = 0; i < 2; ++i) {
 423       if (!(instr.dest.write_mask & (1 << i)))
 424          continue;
 425      ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
 426                              from_nir(instr.src[0], i), write);
 427      emit_instruction(ir);
 428    }
 429    ir->set_flag(alu_last_instr);
 430    return true;
 431 }
 432
 433 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
 434 {
 435    emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
 436                                        from_nir(instr.src[0], comp), last_write));
 437    return true;
 438 }
 439
 440 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
 441 {
 442    AluInstruction *ir = nullptr;
 443    std::set<int> src_slot;
 444    for(unsigned i = 0; i < nc; ++i) {
 445       if (instr.dest.write_mask & (1 << i)){
 446          auto src = from_nir(instr.src[i], 0);
 447          ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
 448          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 449
 450          // FIXME: This is a rather crude approach to fix the problem that
 451          // r600 can't read from four different slots of the same component
 452          // here we check only for the register index
 453          if (src->type() == Value::gpr)
 454             src_slot.insert(src->sel());
 455          if (src_slot.size() >= 3) {
 456             src_slot.clear();
 457             ir->set_flag(alu_last_instr);
 458          }
 459          emit_instruction(ir);
 460       }
 461    }
 462    if (ir)
 463       ir->set_flag(alu_last_instr);
 464    return true;
 465 }
 466
 467 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
 468 {
 469    const nir_alu_src& src0 = instr.src[0];
 470    const nir_alu_src& src1 = instr.src[1];
 471
 472    AluInstruction *ir = nullptr;
 473    for (int i = 0; i < n ; ++i) {
 474       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
 475             from_nir(src0, i), from_nir(src1, i),
 476                               instr.dest.write_mask & (1 << i) ? write : empty);
 477
 478       if (src0.negate) ir->set_flag(alu_src0_neg);
 479       if (src0.abs) ir->set_flag(alu_src0_abs);
 480       if (src1.negate) ir->set_flag(alu_src1_neg);
 481       if (src1.abs) ir->set_flag(alu_src1_abs);
 482
 483       if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 484       emit_instruction(ir);
 485    }
 486    for (int i = n; i < 4 ; ++i) {
 487       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
 488                               Value::zero, Value::zero,
 489                               instr.dest.write_mask & (1 << i) ? write : empty);
 490       emit_instruction(ir);
 491    }
 492
 493    if (ir)
 494       ir->set_flag(alu_last_instr);
 495    return true;
 496 }
 497
 498 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
 499 {
 500    const nir_alu_src& src0 = instr.src[0];
 501    const nir_alu_src& src1 = instr.src[1];
 502
 503    AluInstruction *ir = nullptr;
 504    for (int i = 0; i < 3 ; ++i) {
 505       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
 506                               from_nir(src0, i), from_nir(src1, i),
 507                               instr.dest.write_mask & (1 << i) ? write : empty);
 508       if (src0.negate) ir->set_flag(alu_src0_neg);
 509       if (src0.abs) ir->set_flag(alu_src0_abs);
 510       if (src1.negate) ir->set_flag(alu_src1_neg);
 511       if (src1.abs) ir->set_flag(alu_src1_abs);
 512       if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 513       emit_instruction(ir);
 514    }
 515
 516    ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
 517                            from_nir(src1, 3), (instr.dest.write_mask) & (1 << 3) ? write : empty);
 518    if (src1.negate) ir->set_flag(alu_src1_neg);
 519    if (src1.abs) ir->set_flag(alu_src1_abs);
 520    emit_instruction(ir);
 521
 522    ir->set_flag(alu_last_instr);
 523    return true;
 524
 525 }
 526
 527 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
 528 {
 529    AluInstruction *ir = nullptr;
 530    for (int i = 0; i < 4 ; ++i) {
 531       if (instr.dest.write_mask & (1 << i)) {
 532          ir = new AluInstruction(op, from_nir(instr.dest, i),
 533                                  from_nir(instr.src[0], i), Value::zero,
 534                                  write);
 535          emit_instruction(ir);
 536       }
 537    }
 538    if (ir)
 539       ir->set_flag(alu_last_instr);
 540    return true;
 541 }
 542
 543 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
 544 {
 545    AluInstruction *ir = nullptr;
 546    for (int i = 0; i < 4 ; ++i) {
 547       if (instr.dest.write_mask & (1 << i)){
 548          ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
 549                                  from_nir(instr.src[0], i), Value::one_f, write);
 550          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
 551          if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
 552          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 553          emit_instruction(ir);
 554       }
 555    }
 556    if (ir)
 557       ir->set_flag(alu_last_instr);
 558    return true;
 559 }
 560
 561 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
 562 {
 563
 564    AluInstruction *ir = nullptr;
 565    PValue v[4]; // this might need some additional temp register creation
 566    for (unsigned i = 0; i < 4 ; ++i)
 567       v[i] = from_nir(instr.dest, i);
 568
 569    EAluOp combine = all ? op2_and_int : op2_or_int;
 570
 571    /* For integers we can not use the modifiers, so this needs some emulation */
 572    /* Should actually be lowered with NIR */
 573    if (instr.src[0].negate == instr.src[1].negate &&
 574        instr.src[0].abs == instr.src[1].abs) {
 575
 576       for (unsigned i = 0; i < nc ; ++i) {
 577          ir = new AluInstruction(op, v[i], from_nir(instr.src[0], i),
 578                from_nir(instr.src[1], i), write);
 579          emit_instruction(ir);
 580       }
 581       if (ir)
 582          ir->set_flag(alu_last_instr);
 583    } else {
 584       std::cerr << "Negate in iequal/inequal not (yet) supported\n";
 585       return false;
 586    }
 587
 588    for (unsigned i = 0; i < nc/2 ; ++i) {
 589       ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
 590       emit_instruction(ir);
 591    }
 592    if (ir)
 593       ir->set_flag(alu_last_instr);
 594
 595    if (nc > 2) {
 596       ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
 597       emit_instruction(ir);
 598    }
 599
 600    return true;
 601 }
 602
 603 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
 604 {
 605    AluInstruction *ir = nullptr;
 606    PValue v[4]; // this might need some additional temp register creation
 607    for (unsigned i = 0; i < 4 ; ++i)
 608       v[i] = from_nir(instr.dest, i);
 609
 610    for (unsigned i = 0; i < nc ; ++i) {
 611       ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
 612             from_nir(instr.src[1],i), write);
 613
 614       if (instr.src[0].abs)
 615          ir->set_flag(alu_src0_abs);
 616       if (instr.src[0].negate)
 617          ir->set_flag(alu_src0_neg);
 618
 619       if (instr.src[1].abs)
 620          ir->set_flag(alu_src1_abs);
 621       if (instr.src[1].negate)
 622          ir->set_flag(alu_src1_neg);
 623
 624       emit_instruction(ir);
 625    }
 626    if (ir)
 627       ir->set_flag(alu_last_instr);
 628
 629    for (unsigned i = 0; i < nc ; ++i) {
 630       ir = new AluInstruction(op1_max4, v[i], v[i], write);
 631       if (all) ir->set_flag(alu_src0_neg);
 632       emit_instruction(ir);
 633    }
 634
 635    for (unsigned i = nc; i < 4 ; ++i) {
 636       ir = new AluInstruction(op1_max4, v[i],
 637                               all ? Value::one_f : Value::zero, write);
 638       if (all)
 639          ir->set_flag(alu_src0_neg);
 640
 641       emit_instruction(ir);
 642    }
 643
 644    ir->set_flag(alu_last_instr);
 645
 646    if (all)
 647       op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
 648    else
 649       op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
 650
 651    ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
 652    if (all)
 653       ir->set_flag(alu_src1_neg);
 654    emit_instruction(ir);
 655
 656    return true;
 657 }
 658
 659 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
 660 {
 661    AluInstruction *ir = nullptr;
 662    PValue v[4]; // this might need some additional temp register creation
 663    for (unsigned i = 0; i < 4 ; ++i)
 664       v[i] = from_nir(instr.dest, i);
 665
 666    for (unsigned i = 0; i < 2 ; ++i) {
 667       ir = new AluInstruction(op, v[i], from_nir(instr.src[0],i),
 668             from_nir(instr.src[1],i), write);
 669       if (instr.src[0].abs)
 670          ir->set_flag(alu_src0_abs);
 671       if (instr.src[0].negate)
 672          ir->set_flag(alu_src0_neg);
 673
 674       if (instr.src[1].abs)
 675          ir->set_flag(alu_src1_abs);
 676       if (instr.src[1].negate)
 677          ir->set_flag(alu_src1_neg);
 678
 679       emit_instruction(ir);
 680    }
 681    if (ir)
 682       ir->set_flag(alu_last_instr);
 683
 684    op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
 685    ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
 686    emit_instruction(ir);
 687
 688    return true;
 689 }
 690
 691 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
 692 {
 693    const nir_alu_src& src0 = instr.src[0];
 694    const nir_alu_src& src1 = instr.src[1];
 695
 696    AluInstruction *ir = nullptr;
 697    for (int i = 0; i < 4 ; ++i) {
 698       if (instr.dest.write_mask & (1 << i)){
 699          ir = new AluInstruction(opcode, from_nir(instr.dest, i), from_nir(src0, i), from_nir(src1, i), last_write);
 700          if (src0.negate) ir->set_flag(alu_src0_neg);
 701          if (src0.abs) ir->set_flag(alu_src0_abs);
 702          if (src1.negate) ir->set_flag(alu_src1_neg);
 703          if (src1.abs) ir->set_flag(alu_src1_abs);
 704          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 705          emit_instruction(ir);
 706       }
 707    }
 708    return true;
 709 }
 710
 711 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
 712 {
 713
 714    const nir_alu_src& src0 = instr.src[0];
 715    const nir_alu_src& src1 = instr.src[1];
 716
 717    if (src0.negate || src1.negate ||
 718        src0.abs || src1.abs) {
 719       std::cerr << "R600: don't support modifiers with integer operations";
 720       return false;
 721    }
 722    return emit_alu_op2(instr, opcode, opts);
 723 }
 724
 725 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
 726 {
 727    const nir_alu_src *src0 = &instr.src[0];
 728    const nir_alu_src *src1 = &instr.src[1];
 729
 730    if (ops & op2_opt_reverse)
 731       std::swap(src0, src1);
 732
 733    bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
 734
 735    AluInstruction *ir = nullptr;
 736    for (int i = 0; i < 4 ; ++i) {
 737       if (instr.dest.write_mask & (1 << i)){
 738          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
 739                                  from_nir(*src0, i), from_nir(*src1, i), write);
 740
 741          if (src0->negate) ir->set_flag(alu_src0_neg);
 742          if (src0->abs) ir->set_flag(alu_src0_abs);
 743          if (src1_negate) ir->set_flag(alu_src1_neg);
 744          if (src1->abs) ir->set_flag(alu_src1_abs);
 745          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 746          emit_instruction(ir);
 747       }
 748    }
 749    if (ir)
 750       ir->set_flag(alu_last_instr);
 751    return true;
 752 }
 753
 754 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
 755 {
 756    const nir_alu_src *src0 = &instr.src[0];
 757    const nir_alu_src *src1 = &instr.src[1];
 758
 759    if (ops & op2_opt_reverse)
 760       std::swap(src0, src1);
 761
 762    GPRVector::Values v0;
 763    for (int i = 0; i < 4 ; ++i)
 764       v0[i] = from_nir(*src0, i);
 765
 766    GPRVector::Values v1;
 767    for (int i = 0; i < 4 ; ++i)
 768       v1[i] = from_nir(*src1, i);
 769
 770    if (src0->abs ||   src0->negate) {
 771       int src0_tmp = allocate_temp_register();
 772       GPRVector::Values v0_temp;
 773       AluInstruction *ir = nullptr;
 774       for (int i = 0; i < 4 ; ++i) {
 775          if (instr.dest.write_mask & (1 << i)) {
 776             v0_temp[i] = PValue(new GPRValue(src0_tmp, i));
 777             ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write);
 778             if (src0->abs) ir->set_flag(alu_src0_abs);
 779             if (src0->negate) ir->set_flag(alu_src0_neg);
 780             emit_instruction(ir);
 781             v0[i] = v0_temp[i];
 782          }
 783       }
 784       if (ir)
 785          ir->set_flag(alu_last_instr);
 786    }
 787
 788    if (src1->abs || src1->negate) {
 789       int src1_tmp = allocate_temp_register();
 790       GPRVector::Values v1_temp;
 791       AluInstruction *ir = nullptr;
 792       for (int i = 0; i < 4 ; ++i) {
 793          if (instr.dest.write_mask & (1 << i)) {
 794             v1_temp[i] = PValue(new GPRValue(src1_tmp, i));
 795             ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write});
 796             if (src1->abs) ir->set_flag(alu_src0_abs);
 797             if (src1->negate) ir->set_flag(alu_src0_neg);
 798             emit_instruction(ir);
 799             v1[i] = v1_temp[i];
 800          }
 801       }
 802       if (ir)
 803          ir->set_flag(alu_last_instr);
 804    }
 805
 806    AluInstruction *ir = nullptr;
 807    for (int i = 0; i < 4 ; ++i) {
 808       if (instr.dest.write_mask & (1 << i)){
 809          ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write});
 810          emit_instruction(ir);
 811       }
 812    }
 813    if (ir)
 814       ir->set_flag(alu_last_instr);
 815    return true;
 816 }
 817
 818
 819 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr)
 820 {
 821    int sel_tmp = allocate_temp_register();
 822    GPRVector tmp(sel_tmp, {0,1,2,3});
 823
 824    AluInstruction *ir = nullptr;
 825    PValue help[4];
 826
 827    for (int i = 0; i < 4 ; ++i) {
 828       if (instr.dest.write_mask & (1 << i)){
 829          help[i] = from_nir(instr.dest, i);
 830          auto s = from_nir(instr.src[0], i);
 831          ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write);
 832          emit_instruction(ir);
 833       }
 834    }
 835    if (ir)
 836       ir->set_flag(alu_last_instr);
 837
 838    for (int i = 0; i < 4 ; ++i) {
 839       if (instr.dest.write_mask & (1 << i)){
 840          ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write);
 841          emit_instruction(ir);
 842       }
 843    }
 844    if (ir)
 845       ir->set_flag(alu_last_instr);
 846
 847    for (int i = 0; i < 4 ; ++i) {
 848       if (instr.dest.write_mask & (1 << i)){
 849
 850          ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i),
 851                                  PValue(new LiteralValue(-1,0)), help[i], write);
 852          emit_instruction(ir);
 853       }
 854    }
 855    if (ir)
 856       ir->set_flag(alu_last_instr);
 857    return true;
 858 }
 859
 860 bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr)
 861 {
 862    PValue help[4];
 863    PValue src[4];
 864    AluInstruction *ir = nullptr;
 865
 866    for (int i = 0; i < 4 ; ++i) {
 867       help[i] = from_nir(instr.dest, i);
 868       src[i] = from_nir(instr.src[0], i);
 869    }
 870
 871    if (instr.src[0].abs) {
 872
 873       for (int i = 0; i < 4 ; ++i) {
 874          if (instr.dest.write_mask & (1 << i)){
 875             ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write);
 876             ir->set_flag(alu_src0_abs);
 877             emit_instruction(ir);
 878          }
 879       }
 880       if (ir)
 881          ir->set_flag(alu_last_instr);
 882
 883       if (instr.src[0].negate) {
 884          for (int i = 0; i < 4 ; ++i) {
 885             if (instr.dest.write_mask & (1 << i)){
 886                ir = new AluInstruction(op1_mov, help[i], help[i], write);
 887                ir->set_flag(alu_src0_neg);
 888                emit_instruction(ir);
 889             }
 890          }
 891          if (ir)
 892             ir->set_flag(alu_last_instr);
 893       }
 894
 895       return true;
 896    }
 897
 898    for (int i = 0; i < 4 ; ++i) {
 899       if (instr.dest.write_mask & (1 << i)){
 900          ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write);
 901          if (instr.src[0].negate) {
 902             ir->set_flag(alu_src0_neg);
 903             ir->set_flag(alu_src2_neg);
 904          }
 905          emit_instruction(ir);
 906       }
 907    }
 908
 909    if (ir)
 910       ir->set_flag(alu_last_instr);
 911
 912    for (int i = 0; i < 4 ; ++i) {
 913       if (instr.dest.write_mask & (1 << i)){
 914          ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write);
 915          ir->set_flag(alu_src0_neg);
 916          ir->set_flag(alu_src1_neg);
 917          emit_instruction(ir);
 918       }
 919    }
 920    if (ir)
 921       ir->set_flag(alu_last_instr);
 922    return true;
 923 }
 924
 925 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
 926                                       std::array<uint8_t, 3> reorder)
 927 {
 928    const nir_alu_src *src[3];
 929    src[0] = &instr.src[reorder[0]];
 930    src[1] = &instr.src[reorder[1]];
 931    src[2] = &instr.src[reorder[2]];
 932
 933    AluInstruction *ir = nullptr;
 934    for (int i = 0; i < 4 ; ++i) {
 935       if (instr.dest.write_mask & (1 << i)){
 936          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
 937                                  from_nir(*src[0], i), from_nir(*src[1], i),
 938                                  from_nir(*src[2], i), write);
 939
 940          if (src[0]->negate) ir->set_flag(alu_src0_neg);
 941          if (src[1]->negate) ir->set_flag(alu_src1_neg);
 942          if (src[2]->negate) ir->set_flag(alu_src2_neg);
 943
 944          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
 945          ir->set_flag(alu_write);
 946          emit_instruction(ir);
 947       }
 948    }
 949    if (ir)
 950       ir->set_flag(alu_last_instr);
 951    return true;
 952 }
 953
 954 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
 955 {
 956    AluInstruction *ir = nullptr;
 957    for (int i = 0; i < 4 ; ++i) {
 958       if (instr.dest.write_mask & (1 << i)){
 959          ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
 960                                  from_nir(instr.src[0], i), write);
 961          emit_instruction(ir);
 962       }
 963    }
 964    if (ir)
 965       ir->set_flag(alu_last_instr);
 966
 967    return true;
 968 }
 969
 970 static const char swz[] = "xyzw01?_";
 971
 972
 973
 974 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr)
 975 {
 976    int sel_tmp = allocate_temp_register();
 977    GPRVector tmp(sel_tmp, {0,1,2,3});
 978
 979    std::array<PValue,4> src;
 980    AluInstruction *ir = nullptr;
 981    for (int i = 0; i < 4 ; ++i) {
 982       if (instr.dest.write_mask & (1 << i)){
 983          src[i] = from_nir(instr.src[0],i);
 984          ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, src[i], write);
 985          emit_instruction(ir);
 986       }
 987    }
 988    if (ir)
 989       ir->set_flag(alu_last_instr);
 990
 991    for (int i = 0; i < 4 ; ++i) {
 992       if (instr.dest.write_mask & (1 << i)){
 993          ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), src[i],
 994                                  src[i], tmp.reg_i(i), write);
 995          emit_instruction(ir);
 996       }
 997    }
 998    if (ir)
 999       ir->set_flag(alu_last_instr);
1000    return true;
1001 }
1002
1003 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod)
1004 {
1005
1006    int sel_tmp = allocate_temp_register();
1007    int sel_tmp0 = allocate_temp_register();
1008    int sel_tmp1 = allocate_temp_register();
1009
1010    PValue asrc1(new GPRValue(sel_tmp, 0));
1011    PValue asrc2(new GPRValue(sel_tmp, 1));
1012    PValue rsign(new GPRValue(sel_tmp, 2));
1013    PValue err(new GPRValue(sel_tmp, 3));
1014
1015    GPRVector tmp0(sel_tmp0, {0,1,2,3});
1016    GPRVector tmp1(sel_tmp1, {0,1,2,3});
1017
1018    std::array<PValue, 4> src0;
1019    std::array<PValue, 4> src1;
1020
1021    for (int i = 0; i < 4 ; ++i) {
1022       if (instr.dest.write_mask & (1 << i)) {
1023          src0[i] = from_nir(instr.src[0], i);
1024          src1[i] = from_nir(instr.src[1], i);
1025       }
1026    }
1027
1028
1029    for (int i = 3; i >= 0 ; --i) {
1030       if (!(instr.dest.write_mask & (1 << i)))
1031          continue;
1032       if (use_signed) {
1033          emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write});
1034          emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write});
1035          emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr});
1036
1037
1038          emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write});
1039          emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr});
1040       } else {
1041          asrc1 = src0[i];
1042          asrc2 = src1[i];
1043       }
1044
1045       emit_instruction(op1_recip_uint,  tmp0.x(), {asrc2}, {alu_write, alu_last_instr});
1046
1047       emit_instruction(op2_mullo_uint,  tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr});
1048
1049       emit_instruction(op2_sub_int,  tmp0.w(), {Value::zero, tmp0.z()}, {alu_write});
1050       emit_instruction(op2_mulhi_uint,  tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr});
1051
1052       emit_instruction(op3_cnde_int,  tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr});
1053
1054       emit_instruction(op2_mulhi_uint,  err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr});
1055
1056       emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write});
1057       emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr});
1058
1059       emit_instruction(op3_cnde_int,  tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr});
1060
1061       emit_instruction(op2_mulhi_uint,  tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr});
1062       emit_instruction(op2_mullo_uint,  tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr});
1063
1064       emit_instruction(op2_sub_int,  tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr});
1065
1066
1067       emit_instruction(op2_setge_uint,  tmp1.x(), {tmp0.w(), asrc2}, {alu_write});
1068       emit_instruction(op2_setge_uint,  tmp1.y(), {asrc1, tmp0.y()}, {alu_write});
1069
1070       if (mod) {
1071          emit_instruction(op2_sub_int,  tmp1.z(), {tmp0.w(), asrc2}, {alu_write});
1072          emit_instruction(op2_add_int,  tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr});
1073       } else {
1074          emit_instruction(op2_add_int,  tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write});
1075          emit_instruction(op2_sub_int,  tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr});
1076       }
1077
1078       emit_instruction(op2_and_int,  tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr});
1079
1080       if (mod)
1081          emit_instruction(op3_cnde_int,  tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr});
1082       else
1083          emit_instruction(op3_cnde_int,  tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr});
1084
1085       if (use_signed) {
1086          emit_instruction(op3_cnde_int,  tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1087          emit_instruction(op2_sub_int,  tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr});
1088
1089          if (mod)
1090             emit_instruction(op3_cndge_int,  from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()},
1091                           {alu_write, alu_last_instr});
1092          else
1093             emit_instruction(op3_cndge_int,  from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()},
1094                           {alu_write, alu_last_instr});
1095       } else {
1096          emit_instruction(op3_cnde_int,  from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1097       }
1098    }
1099    return true;
1100 }
1101
1102 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, GPRVector::Values& s,
1103                                              GPRVector::Values& v, int ncomp)
1104 {
1105
1106    AluInstruction *alu = nullptr;
1107    for (int i = 0; i < ncomp; ++i) {
1108       alu  = new AluInstruction(op1_mov,  v[i], s[i], {alu_write});
1109       if (src.abs)
1110          alu->set_flag(alu_src0_abs);
1111       if (src.negate)
1112          alu->set_flag(alu_src0_neg);
1113       emit_instruction(alu);
1114    }
1115    make_last(alu);
1116 }
1117
1118 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
1119                                       bool fine)
1120 {
1121
1122    GPRVector::Values v;
1123    GPRVector::Values s;
1124    GPRVector::Values *source = &s;
1125    std::array<int, 4> writemask = {0,1,2,3};
1126
1127    int ncomp = instr.src[0].src.is_ssa ? instr.src[0].src.ssa->num_components :
1128                instr.src[0].src.reg.reg->num_components;
1129
1130    for (int i = 0; i < 4; ++i) {
1131       writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
1132       v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
1133       s[i] = from_nir(instr.src[0], (i < ncomp) ? i : 0);
1134    }
1135
1136    if (instr.src[0].abs || instr.src[0].negate) {
1137       split_alu_modifiers(instr.src[0], s, v, ncomp);
1138       source = &v;
1139    }
1140
1141    /* This is querying the dreivatives of the output fb, so we would either need
1142     * access to the neighboring pixels or to the framebuffer. Neither is currently
1143     * implemented */
1144    GPRVector dst(v);
1145    GPRVector src(*source);
1146
1147    auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
1148    tex->set_dest_swizzle(writemask);
1149
1150    if (fine) {
1151       std::cerr << "Sewt fine flag\n";
1152       tex->set_flag(TexInstruction::grad_fine);
1153    }
1154
1155    emit_instruction(tex);
1156
1157    return true;
1158 }
1159
1160 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode)
1161 {
1162    int itmp = allocate_temp_register();
1163    std::array<PValue, 4> tmp;
1164    std::array<PValue, 4> dst;
1165    std::array<PValue, 4> src0;
1166    std::array<PValue, 4> shift;
1167
1168    PValue l32(new LiteralValue(32));
1169    unsigned write_mask = instr.dest.write_mask;
1170
1171    AluInstruction *ir = nullptr;
1172    for (int i = 0; i < 4; i++) {
1173       if (!(write_mask & (1<<i)))
1174                         continue;
1175       dst[i] = from_nir(instr.dest, i);
1176       src0[i] = from_nir(instr.src[0], i);
1177       shift[i] = from_nir(instr.src[2], i);
1178
1179       ir = new AluInstruction(opcode, dst[i],
1180                               {src0[i], from_nir(instr.src[1], i), shift[i]},
1181                               {alu_write});
1182       emit_instruction(ir);
1183    }
1184    make_last(ir);
1185
1186    for (int i = 0; i < 4; i++) {
1187       if (!(write_mask & (1<<i)))
1188                         continue;
1189       tmp[i] = PValue(new GPRValue(itmp, i));
1190       ir = new AluInstruction(op2_setge_int, tmp[i], {shift[i], l32},
1191       {alu_write});
1192       emit_instruction(ir);
1193    }
1194    make_last(ir);
1195
1196    for (int i = 0; i < 4; i++) {
1197       if (!(write_mask & (1<<i)))
1198                         continue;
1199       ir = new AluInstruction(op3_cnde_int, dst[i], {tmp[i], dst[i], src0[i]},
1200                               {alu_write});
1201       emit_instruction(ir);
1202    }
1203    make_last(ir);
1204
1205    return true;
1206 }
1207
1208 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
1209 {
1210    auto t0 = get_temp_vec4();
1211    auto t1 = get_temp_vec4();
1212    auto t2 = get_temp_vec4();
1213
1214    PValue l32(new LiteralValue(32));
1215    unsigned write_mask = instr.dest.write_mask;
1216    if (!write_mask) return true;
1217
1218    AluInstruction *ir = nullptr;
1219    for (int i = 0; i < 4; i++) {
1220       if (!(write_mask & (1<<i)))
1221                         continue;
1222
1223       ir = new AluInstruction(op2_setge_int, t0[i], {from_nir(instr.src[3], i), l32}, {alu_write});
1224       emit_instruction(ir);
1225    }
1226    make_last(ir);
1227
1228    for (int i = 0; i < 4; i++) {
1229       if (!(write_mask & (1<<i)))
1230                         continue;
1231       ir = new AluInstruction(op2_bfm_int, t1[i], {from_nir(instr.src[3], i),
1232                                                    from_nir(instr.src[2], i)}, {alu_write});
1233       emit_instruction(ir);
1234    }
1235    ir->set_flag(alu_last_instr);
1236
1237    for (int i = 0; i < 4; i++) {
1238       if (!(write_mask & (1<<i)))
1239                         continue;
1240       ir = new AluInstruction(op2_lshl_int, t2[i], {from_nir(instr.src[1], i),
1241                                                     from_nir(instr.src[2], i)}, {alu_write});
1242       emit_instruction(ir);
1243    }
1244    ir->set_flag(alu_last_instr);
1245
1246
1247    for (int i = 0; i < 4; i++) {
1248       if (!(write_mask & (1<<i)))
1249                         continue;
1250       ir = new AluInstruction(op3_bfi_int, from_nir(instr.dest, i),
1251                   {t1[i], t2[i], from_nir(instr.src[0], i)}, {alu_write});
1252       emit_instruction(ir);
1253    }
1254    ir->set_flag(alu_last_instr);
1255
1256    for (int i = 0; i < 4; i++) {
1257       if (!(write_mask & (1<<i)))
1258                         continue;
1259       ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
1260                              {t0[i], from_nir(instr.dest, i),
1261                                      from_nir(instr.src[1], i)}, {alu_write});
1262       emit_instruction(ir);
1263    }
1264    ir->set_flag(alu_last_instr);
1265
1266    return true;
1267 }
1268
1269 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1270 {
1271    emit_instruction(op2_lshr_int, from_nir(instr.dest, 0),
1272    {from_nir(instr.src[0], 0), PValue(new LiteralValue(16))},
1273    {alu_write, alu_last_instr});
1274
1275    emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1276    {from_nir(instr.dest, 0)},{alu_write, alu_last_instr});
1277
1278    return true;
1279 }
1280
1281 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1282 {
1283    emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1284    {from_nir(instr.src[0], 0)},{alu_write, alu_last_instr});
1285    return true;
1286 }
1287
1288 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1289 {
1290    int it0 = allocate_temp_register();
1291    PValue x(new GPRValue(it0, 0));
1292    PValue y(new GPRValue(it0, 1));
1293
1294    emit_instruction(op1_flt32_to_flt16, x,{from_nir(instr.src[0], 0)},{alu_write});
1295    emit_instruction(op1_flt32_to_flt16, y,{from_nir(instr.src[1], 0)},{alu_write, alu_last_instr});
1296
1297    emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1298
1299    emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1300
1301    return true;
1302 }
1303
1304 }