src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_nir.h"
  25 #include "brw_vec4.h"
  26 #include "glsl/ir_uniform.h"
  27
  28 namespace brw {
  29
  30 void
  31 vec4_visitor::emit_nir_code()
  32 {
  33    nir_shader *nir = prog->nir;
  34
  35    if (nir->num_inputs > 0)
  36       nir_setup_inputs(nir);
  37
  38    if (nir->num_uniforms > 0)
  39       nir_setup_uniforms(nir);
  40
  41    nir_setup_system_values(nir);
  42
  43    /* get the main function and emit it */
  44    nir_foreach_overload(nir, overload) {
  45       assert(strcmp(overload->function->name, "main") == 0);
  46       assert(overload->impl);
  47       nir_emit_impl(overload->impl);
  48    }
  49 }
  50
  51 void
  52 vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
  53 {
  54    dst_reg *reg;
  55
  56    switch (instr->intrinsic) {
  57    case nir_intrinsic_load_vertex_id:
  58       unreachable("should be lowered by lower_vertex_id().");
  59
  60    case nir_intrinsic_load_vertex_id_zero_base:
  61       reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
  62       if (reg->file == BAD_FILE)
  63          *reg =
  64             *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
  65                                              glsl_type::int_type);
  66       break;
  67
  68    case nir_intrinsic_load_base_vertex:
  69       reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
  70       if (reg->file == BAD_FILE)
  71          *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
  72                                                  glsl_type::int_type);
  73       break;
  74
  75    case nir_intrinsic_load_instance_id:
  76       reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
  77       if (reg->file == BAD_FILE)
  78          *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
  79                                                  glsl_type::int_type);
  80       break;
  81
  82    default:
  83       break;
  84    }
  85 }
  86
  87 static bool
  88 setup_system_values_block(nir_block *block, void *void_visitor)
  89 {
  90    vec4_visitor *v = (vec4_visitor *)void_visitor;
  91
  92    nir_foreach_instr(block, instr) {
  93       if (instr->type != nir_instr_type_intrinsic)
  94          continue;
  95
  96       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  97       v->nir_setup_system_value_intrinsic(intrin);
  98    }
  99
 100    return true;
 101 }
 102
 103 void
 104 vec4_visitor::nir_setup_system_values(nir_shader *shader)
 105 {
 106    nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX);
 107
 108    nir_foreach_overload(shader, overload) {
 109       assert(strcmp(overload->function->name, "main") == 0);
 110       assert(overload->impl);
 111       nir_foreach_block(overload->impl, setup_system_values_block, this);
 112    }
 113 }
 114
 115 void
 116 vec4_visitor::nir_setup_inputs(nir_shader *shader)
 117 {
 118    nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
 119
 120    foreach_list_typed(nir_variable, var, node, &shader->inputs) {
 121       int offset = var->data.driver_location;
 122       unsigned size = type_size(var->type);
 123       for (unsigned i = 0; i < size; i++) {
 124          src_reg src = src_reg(ATTR, var->data.location + i, var->type);
 125          nir_inputs[offset + i] = src;
 126       }
 127    }
 128 }
 129
 130 void
 131 vec4_visitor::nir_setup_uniforms(nir_shader *shader)
 132 {
 133    uniforms = 0;
 134
 135    nir_uniform_driver_location =
 136       rzalloc_array(mem_ctx, unsigned, this->uniform_array_size);
 137
 138    if (shader_prog) {
 139       foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
 140          /* UBO's, atomics and samplers don't take up space in the
 141             uniform file */
 142          if (var->interface_type != NULL || var->type->contains_atomic() ||
 143              type_size(var->type) == 0) {
 144             continue;
 145          }
 146
 147          assert(uniforms < uniform_array_size);
 148          this->uniform_size[uniforms] = type_size(var->type);
 149
 150          if (strncmp(var->name, "gl_", 3) == 0)
 151             nir_setup_builtin_uniform(var);
 152          else
 153             nir_setup_uniform(var);
 154       }
 155    } else {
 156       /* ARB_vertex_program is not supported yet */
 157       assert("Not implemented");
 158    }
 159 }
 160
 161 void
 162 vec4_visitor::nir_setup_uniform(nir_variable *var)
 163 {
 164    int namelen = strlen(var->name);
 165
 166    /* The data for our (non-builtin) uniforms is stored in a series of
 167     * gl_uniform_driver_storage structs for each subcomponent that
 168     * glGetUniformLocation() could name.  We know it's been set up in the same
 169     * order we'd walk the type, so walk the list of storage and find anything
 170     * with our name, or the prefix of a component that starts with our name.
 171     */
 172     for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
 173        struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 174
 175        if (storage->builtin)
 176           continue;
 177
 178        if (strncmp(var->name, storage->name, namelen) != 0 ||
 179            (storage->name[namelen] != 0 &&
 180             storage->name[namelen] != '.' &&
 181             storage->name[namelen] != '[')) {
 182           continue;
 183        }
 184
 185        gl_constant_value *components = storage->storage;
 186        unsigned vector_count = (MAX2(storage->array_elements, 1) *
 187                                 storage->type->matrix_columns);
 188
 189        for (unsigned s = 0; s < vector_count; s++) {
 190           assert(uniforms < uniform_array_size);
 191           uniform_vector_size[uniforms] = storage->type->vector_elements;
 192
 193           int i;
 194           for (i = 0; i < uniform_vector_size[uniforms]; i++) {
 195              stage_prog_data->param[uniforms * 4 + i] = components;
 196              components++;
 197           }
 198           for (; i < 4; i++) {
 199              static const gl_constant_value zero = { 0.0 };
 200              stage_prog_data->param[uniforms * 4 + i] = &zero;
 201           }
 202
 203           nir_uniform_driver_location[uniforms] = var->data.driver_location;
 204           uniforms++;
 205        }
 206     }
 207 }
 208
 209 void
 210 vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
 211 {
 212    const nir_state_slot *const slots = var->state_slots;
 213    assert(var->state_slots != NULL);
 214
 215    for (unsigned int i = 0; i < var->num_state_slots; i++) {
 216       /* This state reference has already been setup by ir_to_mesa,
 217        * but we'll get the same index back here.  We can reference
 218        * ParameterValues directly, since unlike brw_fs.cpp, we never
 219        * add new state references during compile.
 220        */
 221       int index = _mesa_add_state_reference(this->prog->Parameters,
 222                                             (gl_state_index *)slots[i].tokens);
 223       gl_constant_value *values =
 224          &this->prog->Parameters->ParameterValues[index][0];
 225
 226       assert(uniforms < uniform_array_size);
 227
 228       for (unsigned j = 0; j < 4; j++)
 229          stage_prog_data->param[uniforms * 4 + j] =
 230             &values[GET_SWZ(slots[i].swizzle, j)];
 231
 232       this->uniform_vector_size[uniforms] =
 233          (var->type->is_scalar() || var->type->is_vector() ||
 234           var->type->is_matrix() ? var->type->vector_elements : 4);
 235
 236       nir_uniform_driver_location[uniforms] = var->data.driver_location;
 237       uniforms++;
 238    }
 239 }
 240
 241 void
 242 vec4_visitor::nir_emit_impl(nir_function_impl *impl)
 243 {
 244    nir_locals = ralloc_array(mem_ctx, dst_reg, impl->reg_alloc);
 245
 246    foreach_list_typed(nir_register, reg, node, &impl->registers) {
 247       unsigned array_elems =
 248          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
 249
 250       nir_locals[reg->index] = dst_reg(GRF, alloc.allocate(array_elems));
 251    }
 252
 253    nir_ssa_values = ralloc_array(mem_ctx, dst_reg, impl->ssa_alloc);
 254
 255    nir_emit_cf_list(&impl->body);
 256 }
 257
 258 void
 259 vec4_visitor::nir_emit_cf_list(exec_list *list)
 260 {
 261    exec_list_validate(list);
 262    foreach_list_typed(nir_cf_node, node, node, list) {
 263       switch (node->type) {
 264       case nir_cf_node_if:
 265          nir_emit_if(nir_cf_node_as_if(node));
 266          break;
 267
 268       case nir_cf_node_loop:
 269          nir_emit_loop(nir_cf_node_as_loop(node));
 270          break;
 271
 272       case nir_cf_node_block:
 273          nir_emit_block(nir_cf_node_as_block(node));
 274          break;
 275
 276       default:
 277          unreachable("Invalid CFG node block");
 278       }
 279    }
 280 }
 281
 282 void
 283 vec4_visitor::nir_emit_if(nir_if *if_stmt)
 284 {
 285    /* First, put the condition in f0 */
 286    src_reg condition = get_nir_src(if_stmt->condition, BRW_REGISTER_TYPE_D, 1);
 287    vec4_instruction *inst = emit(MOV(dst_null_d(), condition));
 288    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 289
 290    emit(IF(BRW_PREDICATE_NORMAL));
 291
 292    nir_emit_cf_list(&if_stmt->then_list);
 293
 294    /* note: if the else is empty, dead CF elimination will remove it */
 295    emit(BRW_OPCODE_ELSE);
 296
 297    nir_emit_cf_list(&if_stmt->else_list);
 298
 299    emit(BRW_OPCODE_ENDIF);
 300 }
 301
 302 void
 303 vec4_visitor::nir_emit_loop(nir_loop *loop)
 304 {
 305    emit(BRW_OPCODE_DO);
 306
 307    nir_emit_cf_list(&loop->body);
 308
 309    emit(BRW_OPCODE_WHILE);
 310 }
 311
 312 void
 313 vec4_visitor::nir_emit_block(nir_block *block)
 314 {
 315    nir_foreach_instr(block, instr) {
 316       nir_emit_instr(instr);
 317    }
 318 }
 319
 320 void
 321 vec4_visitor::nir_emit_instr(nir_instr *instr)
 322 {
 323    this->base_ir = instr;
 324
 325    switch (instr->type) {
 326    case nir_instr_type_load_const:
 327       nir_emit_load_const(nir_instr_as_load_const(instr));
 328       break;
 329
 330    case nir_instr_type_intrinsic:
 331       nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
 332       break;
 333
 334    case nir_instr_type_alu:
 335       nir_emit_alu(nir_instr_as_alu(instr));
 336       break;
 337
 338    case nir_instr_type_jump:
 339       nir_emit_jump(nir_instr_as_jump(instr));
 340       break;
 341
 342    case nir_instr_type_tex:
 343       nir_emit_texture(nir_instr_as_tex(instr));
 344       break;
 345
 346    default:
 347       fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
 348       break;
 349    }
 350 }
 351
 352 static dst_reg
 353 dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
 354                     unsigned base_offset, nir_src *indirect)
 355 {
 356    dst_reg reg;
 357
 358    reg = v->nir_locals[nir_reg->index];
 359    reg = offset(reg, base_offset);
 360    if (indirect) {
 361       reg.reladdr =
 362          new(v->mem_ctx) src_reg(v->get_nir_src(*indirect,
 363                                                 BRW_REGISTER_TYPE_D,
 364                                                 1));
 365    }
 366    return reg;
 367 }
 368
 369 dst_reg
 370 vec4_visitor::get_nir_dest(nir_dest dest)
 371 {
 372    assert(!dest.is_ssa);
 373    return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
 374                               dest.reg.indirect);
 375 }
 376
 377 dst_reg
 378 vec4_visitor::get_nir_dest(nir_dest dest, enum brw_reg_type type)
 379 {
 380    return retype(get_nir_dest(dest), type);
 381 }
 382
 383 dst_reg
 384 vec4_visitor::get_nir_dest(nir_dest dest, nir_alu_type type)
 385 {
 386    return get_nir_dest(dest, brw_type_for_nir_type(type));
 387 }
 388
 389 src_reg
 390 vec4_visitor::get_nir_src(nir_src src, enum brw_reg_type type,
 391                           unsigned num_components)
 392 {
 393    dst_reg reg;
 394
 395    if (src.is_ssa) {
 396       assert(src.ssa != NULL);
 397       reg = nir_ssa_values[src.ssa->index];
 398    }
 399    else {
 400      reg = dst_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
 401                                src.reg.indirect);
 402    }
 403
 404    reg = retype(reg, type);
 405
 406    src_reg reg_as_src = src_reg(reg);
 407    reg_as_src.swizzle = brw_swizzle_for_size(num_components);
 408    return reg_as_src;
 409 }
 410
 411 src_reg
 412 vec4_visitor::get_nir_src(nir_src src, nir_alu_type type,
 413                           unsigned num_components)
 414 {
 415    return get_nir_src(src, brw_type_for_nir_type(type), num_components);
 416 }
 417
 418 src_reg
 419 vec4_visitor::get_nir_src(nir_src src, unsigned num_components)
 420 {
 421    /* if type is not specified, default to signed int */
 422    return get_nir_src(src, nir_type_int, num_components);
 423 }
 424
 425 void
 426 vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
 427 {
 428    dst_reg reg = dst_reg(GRF, alloc.allocate(1));
 429    reg.type =  BRW_REGISTER_TYPE_F;
 430
 431    /* @FIXME: consider emitting vector operations to save some MOVs in
 432     * cases where the components are representable in 8 bits.
 433     * By now, we emit a MOV for each component.
 434     */
 435    for (unsigned i = 0; i < instr->def.num_components; ++i) {
 436       reg.writemask = 1 << i;
 437       emit(MOV(reg, src_reg(instr->value.f[i])));
 438    }
 439
 440    /* Set final writemask */
 441    reg.writemask = brw_writemask_for_size(instr->def.num_components);
 442
 443    nir_ssa_values[instr->def.index] = reg;
 444 }
 445
 446 void
 447 vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 448 {
 449    dst_reg dest;
 450    src_reg src;
 451
 452    bool has_indirect = false;
 453
 454    switch (instr->intrinsic) {
 455
 456    case nir_intrinsic_load_input_indirect:
 457       has_indirect = true;
 458       /* fallthrough */
 459    case nir_intrinsic_load_input: {
 460       int offset = instr->const_index[0];
 461       src = nir_inputs[offset];
 462
 463       if (has_indirect) {
 464          dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[0],
 465                                                          BRW_REGISTER_TYPE_D,
 466                                                          1));
 467       }
 468       dest = get_nir_dest(instr->dest, src.type);
 469       dest.writemask = brw_writemask_for_size(instr->num_components);
 470
 471       emit(MOV(dest, src));
 472       break;
 473    }
 474
 475    case nir_intrinsic_store_output_indirect:
 476       has_indirect = true;
 477       /* fallthrough */
 478    case nir_intrinsic_store_output: {
 479       int varying = instr->const_index[0];
 480
 481       src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
 482                         instr->num_components);
 483       dest = dst_reg(src);
 484
 485       if (has_indirect) {
 486          dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[1],
 487                                                          BRW_REGISTER_TYPE_D,
 488                                                          1));
 489       }
 490       output_reg[varying] = dest;
 491       break;
 492    }
 493
 494    case nir_intrinsic_load_vertex_id:
 495       unreachable("should be lowered by lower_vertex_id()");
 496
 497    case nir_intrinsic_load_vertex_id_zero_base: {
 498       src_reg vertex_id =
 499          src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
 500       assert(vertex_id.file != BAD_FILE);
 501       dest = get_nir_dest(instr->dest, vertex_id.type);
 502       emit(MOV(dest, vertex_id));
 503       break;
 504    }
 505
 506    case nir_intrinsic_load_base_vertex: {
 507       src_reg base_vertex =
 508          src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
 509       assert(base_vertex.file != BAD_FILE);
 510       dest = get_nir_dest(instr->dest, base_vertex.type);
 511       emit(MOV(dest, base_vertex));
 512       break;
 513    }
 514
 515    case nir_intrinsic_load_instance_id: {
 516       src_reg instance_id =
 517          src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
 518       assert(instance_id.file != BAD_FILE);
 519       dest = get_nir_dest(instr->dest, instance_id.type);
 520       emit(MOV(dest, instance_id));
 521       break;
 522    }
 523
 524    case nir_intrinsic_load_uniform_indirect:
 525       has_indirect = true;
 526       /* fallthrough */
 527    case nir_intrinsic_load_uniform: {
 528       int uniform = instr->const_index[0];
 529
 530       dest = get_nir_dest(instr->dest);
 531
 532       if (has_indirect) {
 533          /* Split addressing into uniform and offset */
 534          int offset = uniform - nir_uniform_driver_location[uniform];
 535          assert(offset >= 0);
 536
 537          uniform -= offset;
 538          assert(uniform >= 0);
 539
 540          src = src_reg(dst_reg(UNIFORM, uniform));
 541          src.reg_offset = offset;
 542          src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
 543          src.reladdr = new(mem_ctx) src_reg(tmp);
 544       } else {
 545          src = src_reg(dst_reg(UNIFORM, uniform));
 546       }
 547
 548       emit(MOV(dest, src));
 549       break;
 550    }
 551
 552    case nir_intrinsic_atomic_counter_read:
 553    case nir_intrinsic_atomic_counter_inc:
 554    case nir_intrinsic_atomic_counter_dec: {
 555       unsigned surf_index = prog_data->base.binding_table.abo_start +
 556          (unsigned) instr->const_index[0];
 557       src_reg offset = get_nir_src(instr->src[0], nir_type_int,
 558                                    instr->num_components);
 559       dest = get_nir_dest(instr->dest);
 560
 561       switch (instr->intrinsic) {
 562          case nir_intrinsic_atomic_counter_inc:
 563             emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
 564                                 src_reg(), src_reg());
 565             break;
 566          case nir_intrinsic_atomic_counter_dec:
 567             emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
 568                                 src_reg(), src_reg());
 569             break;
 570          case nir_intrinsic_atomic_counter_read:
 571             emit_untyped_surface_read(surf_index, dest, offset);
 572             break;
 573          default:
 574             unreachable("Unreachable");
 575       }
 576
 577       brw_mark_surface_used(stage_prog_data, surf_index);
 578       break;
 579    }
 580
 581    case nir_intrinsic_load_ubo_indirect:
 582       has_indirect = true;
 583       /* fallthrough */
 584    case nir_intrinsic_load_ubo: {
 585       nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]);
 586       src_reg surf_index;
 587
 588       dest = get_nir_dest(instr->dest);
 589
 590       if (const_block_index) {
 591          /* The block index is a constant, so just emit the binding table entry
 592           * as an immediate.
 593           */
 594          surf_index = src_reg(prog_data->base.binding_table.ubo_start +
 595                               const_block_index->u[0]);
 596       } else {
 597          /* The block index is not a constant. Evaluate the index expression
 598           * per-channel and add the base UBO index; we have to select a value
 599           * from any live channel.
 600           */
 601          surf_index = src_reg(this, glsl_type::uint_type);
 602          emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int,
 603                                                    instr->num_components),
 604                   src_reg(prog_data->base.binding_table.ubo_start)));
 605          surf_index = emit_uniformize(surf_index);
 606
 607          /* Assume this may touch any UBO. It would be nice to provide
 608           * a tighter bound, but the array information is already lowered away.
 609           */
 610          brw_mark_surface_used(&prog_data->base,
 611                                prog_data->base.binding_table.ubo_start +
 612                                shader_prog->NumUniformBlocks - 1);
 613       }
 614
 615       unsigned const_offset = instr->const_index[0];
 616       src_reg offset;
 617
 618       if (!has_indirect)  {
 619          offset = src_reg(const_offset / 16);
 620       } else {
 621          offset = src_reg(this, glsl_type::uint_type);
 622          emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1),
 623                   src_reg(4u)));
 624       }
 625
 626       src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
 627       packed_consts.type = dest.type;
 628
 629       emit_pull_constant_load_reg(dst_reg(packed_consts),
 630                                   surf_index,
 631                                   offset,
 632                                   NULL, NULL /* before_block/inst */);
 633
 634       packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
 635       packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
 636                                             const_offset % 16 / 4,
 637                                             const_offset % 16 / 4,
 638                                             const_offset % 16 / 4);
 639
 640       emit(MOV(dest, packed_consts));
 641       break;
 642    }
 643
 644    default:
 645       unreachable("Unknown intrinsic");
 646    }
 647 }
 648
 649 static unsigned
 650 brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
 651 {
 652    return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
 653 }
 654
 655 void
 656 vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
 657 {
 658    vec4_instruction *inst;
 659
 660    dst_reg dst = get_nir_dest(instr->dest.dest,
 661                               nir_op_infos[instr->op].output_type);
 662    dst.writemask = instr->dest.write_mask;
 663
 664    src_reg op[4];
 665    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
 666       op[i] = get_nir_src(instr->src[i].src,
 667                           nir_op_infos[instr->op].input_types[i], 4);
 668       op[i].swizzle = brw_swizzle_for_nir_swizzle(instr->src[i].swizzle);
 669       op[i].abs = instr->src[i].abs;
 670       op[i].negate = instr->src[i].negate;
 671    }
 672
 673    switch (instr->op) {
 674    case nir_op_imov:
 675    case nir_op_fmov:
 676       inst = emit(MOV(dst, op[0]));
 677       inst->saturate = instr->dest.saturate;
 678       break;
 679
 680    case nir_op_vec2:
 681    case nir_op_vec3:
 682    case nir_op_vec4:
 683       unreachable("not reached: should be handled by lower_vec_to_movs()");
 684
 685    case nir_op_i2f:
 686    case nir_op_u2f:
 687       inst = emit(MOV(dst, op[0]));
 688       inst->saturate = instr->dest.saturate;
 689       break;
 690
 691    case nir_op_f2i:
 692    case nir_op_f2u:
 693       inst = emit(MOV(dst, op[0]));
 694       break;
 695
 696    case nir_op_fadd:
 697       /* fall through */
 698    case nir_op_iadd:
 699       inst = emit(ADD(dst, op[0], op[1]));
 700       inst->saturate = instr->dest.saturate;
 701       break;
 702
 703    case nir_op_fmul:
 704       inst = emit(MUL(dst, op[0], op[1]));
 705       inst->saturate = instr->dest.saturate;
 706       break;
 707
 708    case nir_op_imul: {
 709       nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
 710       nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
 711
 712       /* For integer multiplication, the MUL uses the low 16 bits of one of
 713        * the operands (src0 through SNB, src1 on IVB and later). The MACH
 714        * accumulates in the contribution of the upper 16 bits of that
 715        * operand. If we can determine that one of the args is in the low
 716        * 16 bits, though, we can just emit a single MUL.
 717        */
 718       if (value0 && value0->u[0] < (1 << 16)) {
 719          if (devinfo->gen < 7)
 720             emit(MUL(dst, op[0], op[1]));
 721          else
 722             emit(MUL(dst, op[1], op[0]));
 723       } else if (value1 && value1->u[0] < (1 << 16)) {
 724          if (devinfo->gen < 7)
 725             emit(MUL(dst, op[1], op[0]));
 726          else
 727             emit(MUL(dst, op[0], op[1]));
 728       } else {
 729          struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
 730
 731          emit(MUL(acc, op[0], op[1]));
 732          emit(MACH(dst_null_d(), op[0], op[1]));
 733          emit(MOV(dst, src_reg(acc)));
 734       }
 735       break;
 736    }
 737
 738    case nir_op_imul_high:
 739    case nir_op_umul_high: {
 740       struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
 741
 742       emit(MUL(acc, op[0], op[1]));
 743       emit(MACH(dst, op[0], op[1]));
 744       break;
 745    }
 746
 747    case nir_op_frcp:
 748       inst = emit_math(SHADER_OPCODE_RCP, dst, op[0]);
 749       inst->saturate = instr->dest.saturate;
 750       break;
 751
 752    case nir_op_fexp2:
 753       inst = emit_math(SHADER_OPCODE_EXP2, dst, op[0]);
 754       inst->saturate = instr->dest.saturate;
 755       break;
 756
 757    case nir_op_flog2:
 758       inst = emit_math(SHADER_OPCODE_LOG2, dst, op[0]);
 759       inst->saturate = instr->dest.saturate;
 760       break;
 761
 762    case nir_op_fsin:
 763       inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
 764       inst->saturate = instr->dest.saturate;
 765       break;
 766
 767    case nir_op_fcos:
 768       inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
 769       inst->saturate = instr->dest.saturate;
 770       break;
 771
 772    case nir_op_idiv:
 773    case nir_op_udiv:
 774       emit_math(SHADER_OPCODE_INT_QUOTIENT, dst, op[0], op[1]);
 775       break;
 776
 777    case nir_op_umod:
 778       emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
 779       break;
 780
 781    case nir_op_ldexp:
 782       unreachable("not reached: should be handled by ldexp_to_arith()");
 783
 784    case nir_op_fsqrt:
 785       inst = emit_math(SHADER_OPCODE_SQRT, dst, op[0]);
 786       inst->saturate = instr->dest.saturate;
 787       break;
 788
 789    case nir_op_frsq:
 790       inst = emit_math(SHADER_OPCODE_RSQ, dst, op[0]);
 791       inst->saturate = instr->dest.saturate;
 792       break;
 793
 794    case nir_op_fpow:
 795       inst = emit_math(SHADER_OPCODE_POW, dst, op[0], op[1]);
 796       inst->saturate = instr->dest.saturate;
 797       break;
 798
 799    case nir_op_uadd_carry: {
 800       struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
 801
 802       emit(ADDC(dst_null_ud(), op[0], op[1]));
 803       emit(MOV(dst, src_reg(acc)));
 804       break;
 805    }
 806
 807    case nir_op_usub_borrow: {
 808       struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
 809
 810       emit(SUBB(dst_null_ud(), op[0], op[1]));
 811       emit(MOV(dst, src_reg(acc)));
 812       break;
 813    }
 814
 815    case nir_op_ftrunc:
 816       inst = emit(RNDZ(dst, op[0]));
 817       inst->saturate = instr->dest.saturate;
 818       break;
 819
 820    case nir_op_fceil: {
 821       src_reg tmp = src_reg(this, glsl_type::float_type);
 822       tmp.swizzle =
 823          brw_swizzle_for_size(instr->src[0].src.is_ssa ?
 824                               instr->src[0].src.ssa->num_components :
 825                               instr->src[0].src.reg.reg->num_components);
 826
 827       op[0].negate = !op[0].negate;
 828       emit(RNDD(dst_reg(tmp), op[0]));
 829       tmp.negate = true;
 830       inst = emit(MOV(dst, tmp));
 831       inst->saturate = instr->dest.saturate;
 832       break;
 833    }
 834
 835    case nir_op_ffloor:
 836       inst = emit(RNDD(dst, op[0]));
 837       inst->saturate = instr->dest.saturate;
 838       break;
 839
 840    case nir_op_ffract:
 841       inst = emit(FRC(dst, op[0]));
 842       inst->saturate = instr->dest.saturate;
 843       break;
 844
 845    case nir_op_fround_even:
 846       inst = emit(RNDE(dst, op[0]));
 847       inst->saturate = instr->dest.saturate;
 848       break;
 849
 850    case nir_op_fmin:
 851    case nir_op_imin:
 852    case nir_op_umin:
 853       inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
 854       inst->saturate = instr->dest.saturate;
 855       break;
 856
 857    case nir_op_fmax:
 858    case nir_op_imax:
 859    case nir_op_umax:
 860       inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
 861       inst->saturate = instr->dest.saturate;
 862       break;
 863
 864    default:
 865       unreachable("Unimplemented ALU operation");
 866    }
 867 }
 868
 869 void
 870 vec4_visitor::nir_emit_jump(nir_jump_instr *instr)
 871 {
 872    /* @TODO: Not yet implemented */
 873 }
 874
 875 void
 876 vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
 877 {
 878    /* @TODO: Not yet implemented */
 879 }
 880
 881 }