src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_nir.h"
  25 #include "brw_vec4.h"
  26 #include "glsl/ir_uniform.h"
  27
  28 namespace brw {
  29
  30 void
  31 vec4_visitor::emit_nir_code()
  32 {
  33    nir_shader *nir = prog->nir;
  34
  35    if (nir->num_inputs > 0)
  36       nir_setup_inputs(nir);
  37
  38    if (nir->num_uniforms > 0)
  39       nir_setup_uniforms(nir);
  40
  41    nir_setup_system_values(nir);
  42
  43    /* get the main function and emit it */
  44    nir_foreach_overload(nir, overload) {
  45       assert(strcmp(overload->function->name, "main") == 0);
  46       assert(overload->impl);
  47       nir_emit_impl(overload->impl);
  48    }
  49 }
  50
  51 void
  52 vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
  53 {
  54    dst_reg *reg;
  55
  56    switch (instr->intrinsic) {
  57    case nir_intrinsic_load_vertex_id:
  58       unreachable("should be lowered by lower_vertex_id().");
  59
  60    case nir_intrinsic_load_vertex_id_zero_base:
  61       reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
  62       if (reg->file == BAD_FILE)
  63          *reg =
  64             *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
  65                                              glsl_type::int_type);
  66       break;
  67
  68    case nir_intrinsic_load_base_vertex:
  69       reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
  70       if (reg->file == BAD_FILE)
  71          *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
  72                                                  glsl_type::int_type);
  73       break;
  74
  75    case nir_intrinsic_load_instance_id:
  76       reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
  77       if (reg->file == BAD_FILE)
  78          *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
  79                                                  glsl_type::int_type);
  80       break;
  81
  82    default:
  83       break;
  84    }
  85 }
  86
  87 static bool
  88 setup_system_values_block(nir_block *block, void *void_visitor)
  89 {
  90    vec4_visitor *v = (vec4_visitor *)void_visitor;
  91
  92    nir_foreach_instr(block, instr) {
  93       if (instr->type != nir_instr_type_intrinsic)
  94          continue;
  95
  96       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  97       v->nir_setup_system_value_intrinsic(intrin);
  98    }
  99
 100    return true;
 101 }
 102
 103 void
 104 vec4_visitor::nir_setup_system_values(nir_shader *shader)
 105 {
 106    nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX);
 107
 108    nir_foreach_overload(shader, overload) {
 109       assert(strcmp(overload->function->name, "main") == 0);
 110       assert(overload->impl);
 111       nir_foreach_block(overload->impl, setup_system_values_block, this);
 112    }
 113 }
 114
 115 void
 116 vec4_visitor::nir_setup_inputs(nir_shader *shader)
 117 {
 118    nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
 119
 120    foreach_list_typed(nir_variable, var, node, &shader->inputs) {
 121       int offset = var->data.driver_location;
 122       unsigned size = type_size(var->type);
 123       for (unsigned i = 0; i < size; i++) {
 124          src_reg src = src_reg(ATTR, var->data.location + i, var->type);
 125          nir_inputs[offset + i] = src;
 126       }
 127    }
 128 }
 129
 130 void
 131 vec4_visitor::nir_setup_uniforms(nir_shader *shader)
 132 {
 133    uniforms = 0;
 134
 135    nir_uniform_driver_location =
 136       rzalloc_array(mem_ctx, unsigned, this->uniform_array_size);
 137
 138    if (shader_prog) {
 139       foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
 140          /* UBO's, atomics and samplers don't take up space in the
 141             uniform file */
 142          if (var->interface_type != NULL || var->type->contains_atomic() ||
 143              type_size(var->type) == 0) {
 144             continue;
 145          }
 146
 147          assert(uniforms < uniform_array_size);
 148          this->uniform_size[uniforms] = type_size(var->type);
 149
 150          if (strncmp(var->name, "gl_", 3) == 0)
 151             nir_setup_builtin_uniform(var);
 152          else
 153             nir_setup_uniform(var);
 154       }
 155    } else {
 156       /* ARB_vertex_program is not supported yet */
 157       assert("Not implemented");
 158    }
 159 }
 160
 161 void
 162 vec4_visitor::nir_setup_uniform(nir_variable *var)
 163 {
 164    int namelen = strlen(var->name);
 165
 166    /* The data for our (non-builtin) uniforms is stored in a series of
 167     * gl_uniform_driver_storage structs for each subcomponent that
 168     * glGetUniformLocation() could name.  We know it's been set up in the same
 169     * order we'd walk the type, so walk the list of storage and find anything
 170     * with our name, or the prefix of a component that starts with our name.
 171     */
 172     for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
 173        struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 174
 175        if (storage->builtin)
 176           continue;
 177
 178        if (strncmp(var->name, storage->name, namelen) != 0 ||
 179            (storage->name[namelen] != 0 &&
 180             storage->name[namelen] != '.' &&
 181             storage->name[namelen] != '[')) {
 182           continue;
 183        }
 184
 185        gl_constant_value *components = storage->storage;
 186        unsigned vector_count = (MAX2(storage->array_elements, 1) *
 187                                 storage->type->matrix_columns);
 188
 189        for (unsigned s = 0; s < vector_count; s++) {
 190           assert(uniforms < uniform_array_size);
 191           uniform_vector_size[uniforms] = storage->type->vector_elements;
 192
 193           int i;
 194           for (i = 0; i < uniform_vector_size[uniforms]; i++) {
 195              stage_prog_data->param[uniforms * 4 + i] = components;
 196              components++;
 197           }
 198           for (; i < 4; i++) {
 199              static const gl_constant_value zero = { 0.0 };
 200              stage_prog_data->param[uniforms * 4 + i] = &zero;
 201           }
 202
 203           nir_uniform_driver_location[uniforms] = var->data.driver_location;
 204           uniforms++;
 205        }
 206     }
 207 }
 208
 209 void
 210 vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
 211 {
 212    const nir_state_slot *const slots = var->state_slots;
 213    assert(var->state_slots != NULL);
 214
 215    for (unsigned int i = 0; i < var->num_state_slots; i++) {
 216       /* This state reference has already been setup by ir_to_mesa,
 217        * but we'll get the same index back here.  We can reference
 218        * ParameterValues directly, since unlike brw_fs.cpp, we never
 219        * add new state references during compile.
 220        */
 221       int index = _mesa_add_state_reference(this->prog->Parameters,
 222                                             (gl_state_index *)slots[i].tokens);
 223       gl_constant_value *values =
 224          &this->prog->Parameters->ParameterValues[index][0];
 225
 226       assert(uniforms < uniform_array_size);
 227
 228       for (unsigned j = 0; j < 4; j++)
 229          stage_prog_data->param[uniforms * 4 + j] =
 230             &values[GET_SWZ(slots[i].swizzle, j)];
 231
 232       this->uniform_vector_size[uniforms] =
 233          (var->type->is_scalar() || var->type->is_vector() ||
 234           var->type->is_matrix() ? var->type->vector_elements : 4);
 235
 236       nir_uniform_driver_location[uniforms] = var->data.driver_location;
 237       uniforms++;
 238    }
 239 }
 240
 241 void
 242 vec4_visitor::nir_emit_impl(nir_function_impl *impl)
 243 {
 244    nir_locals = ralloc_array(mem_ctx, dst_reg, impl->reg_alloc);
 245
 246    foreach_list_typed(nir_register, reg, node, &impl->registers) {
 247       unsigned array_elems =
 248          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
 249
 250       nir_locals[reg->index] = dst_reg(GRF, alloc.allocate(array_elems));
 251    }
 252
 253    nir_ssa_values = ralloc_array(mem_ctx, dst_reg, impl->ssa_alloc);
 254
 255    nir_emit_cf_list(&impl->body);
 256 }
 257
 258 void
 259 vec4_visitor::nir_emit_cf_list(exec_list *list)
 260 {
 261    exec_list_validate(list);
 262    foreach_list_typed(nir_cf_node, node, node, list) {
 263       switch (node->type) {
 264       case nir_cf_node_if:
 265          nir_emit_if(nir_cf_node_as_if(node));
 266          break;
 267
 268       case nir_cf_node_loop:
 269          nir_emit_loop(nir_cf_node_as_loop(node));
 270          break;
 271
 272       case nir_cf_node_block:
 273          nir_emit_block(nir_cf_node_as_block(node));
 274          break;
 275
 276       default:
 277          unreachable("Invalid CFG node block");
 278       }
 279    }
 280 }
 281
 282 void
 283 vec4_visitor::nir_emit_if(nir_if *if_stmt)
 284 {
 285    /* First, put the condition in f0 */
 286    src_reg condition = get_nir_src(if_stmt->condition, BRW_REGISTER_TYPE_D, 1);
 287    vec4_instruction *inst = emit(MOV(dst_null_d(), condition));
 288    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 289
 290    emit(IF(BRW_PREDICATE_NORMAL));
 291
 292    nir_emit_cf_list(&if_stmt->then_list);
 293
 294    /* note: if the else is empty, dead CF elimination will remove it */
 295    emit(BRW_OPCODE_ELSE);
 296
 297    nir_emit_cf_list(&if_stmt->else_list);
 298
 299    emit(BRW_OPCODE_ENDIF);
 300 }
 301
 302 void
 303 vec4_visitor::nir_emit_loop(nir_loop *loop)
 304 {
 305    emit(BRW_OPCODE_DO);
 306
 307    nir_emit_cf_list(&loop->body);
 308
 309    emit(BRW_OPCODE_WHILE);
 310 }
 311
 312 void
 313 vec4_visitor::nir_emit_block(nir_block *block)
 314 {
 315    nir_foreach_instr(block, instr) {
 316       nir_emit_instr(instr);
 317    }
 318 }
 319
 320 void
 321 vec4_visitor::nir_emit_instr(nir_instr *instr)
 322 {
 323    this->base_ir = instr;
 324
 325    switch (instr->type) {
 326    case nir_instr_type_load_const:
 327       nir_emit_load_const(nir_instr_as_load_const(instr));
 328       break;
 329
 330    case nir_instr_type_intrinsic:
 331       nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
 332       break;
 333
 334    case nir_instr_type_alu:
 335       nir_emit_alu(nir_instr_as_alu(instr));
 336       break;
 337
 338    case nir_instr_type_jump:
 339       nir_emit_jump(nir_instr_as_jump(instr));
 340       break;
 341
 342    case nir_instr_type_tex:
 343       nir_emit_texture(nir_instr_as_tex(instr));
 344       break;
 345
 346    default:
 347       fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
 348       break;
 349    }
 350 }
 351
 352 static dst_reg
 353 dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
 354                     unsigned base_offset, nir_src *indirect)
 355 {
 356    dst_reg reg;
 357
 358    reg = v->nir_locals[nir_reg->index];
 359    reg = offset(reg, base_offset);
 360    if (indirect) {
 361       reg.reladdr =
 362          new(v->mem_ctx) src_reg(v->get_nir_src(*indirect,
 363                                                 BRW_REGISTER_TYPE_D,
 364                                                 1));
 365    }
 366    return reg;
 367 }
 368
 369 dst_reg
 370 vec4_visitor::get_nir_dest(nir_dest dest)
 371 {
 372    assert(!dest.is_ssa);
 373    return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
 374                               dest.reg.indirect);
 375 }
 376
 377 dst_reg
 378 vec4_visitor::get_nir_dest(nir_dest dest, enum brw_reg_type type)
 379 {
 380    return retype(get_nir_dest(dest), type);
 381 }
 382
 383 dst_reg
 384 vec4_visitor::get_nir_dest(nir_dest dest, nir_alu_type type)
 385 {
 386    return get_nir_dest(dest, brw_type_for_nir_type(type));
 387 }
 388
 389 src_reg
 390 vec4_visitor::get_nir_src(nir_src src, enum brw_reg_type type,
 391                           unsigned num_components)
 392 {
 393    dst_reg reg;
 394
 395    if (src.is_ssa) {
 396       assert(src.ssa != NULL);
 397       reg = nir_ssa_values[src.ssa->index];
 398    }
 399    else {
 400      reg = dst_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
 401                                src.reg.indirect);
 402    }
 403
 404    reg = retype(reg, type);
 405
 406    src_reg reg_as_src = src_reg(reg);
 407    reg_as_src.swizzle = brw_swizzle_for_size(num_components);
 408    return reg_as_src;
 409 }
 410
 411 src_reg
 412 vec4_visitor::get_nir_src(nir_src src, nir_alu_type type,
 413                           unsigned num_components)
 414 {
 415    return get_nir_src(src, brw_type_for_nir_type(type), num_components);
 416 }
 417
 418 src_reg
 419 vec4_visitor::get_nir_src(nir_src src, unsigned num_components)
 420 {
 421    /* if type is not specified, default to signed int */
 422    return get_nir_src(src, nir_type_int, num_components);
 423 }
 424
 425 void
 426 vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
 427 {
 428    dst_reg reg = dst_reg(GRF, alloc.allocate(1));
 429    reg.type =  BRW_REGISTER_TYPE_F;
 430
 431    /* @FIXME: consider emitting vector operations to save some MOVs in
 432     * cases where the components are representable in 8 bits.
 433     * By now, we emit a MOV for each component.
 434     */
 435    for (unsigned i = 0; i < instr->def.num_components; ++i) {
 436       reg.writemask = 1 << i;
 437       emit(MOV(reg, src_reg(instr->value.f[i])));
 438    }
 439
 440    /* Set final writemask */
 441    reg.writemask = brw_writemask_for_size(instr->def.num_components);
 442
 443    nir_ssa_values[instr->def.index] = reg;
 444 }
 445
 446 void
 447 vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 448 {
 449    dst_reg dest;
 450    src_reg src;
 451
 452    bool has_indirect = false;
 453
 454    switch (instr->intrinsic) {
 455
 456    case nir_intrinsic_load_input_indirect:
 457       has_indirect = true;
 458       /* fallthrough */
 459    case nir_intrinsic_load_input: {
 460       int offset = instr->const_index[0];
 461       src = nir_inputs[offset];
 462
 463       if (has_indirect) {
 464          dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[0],
 465                                                          BRW_REGISTER_TYPE_D,
 466                                                          1));
 467       }
 468       dest = get_nir_dest(instr->dest, src.type);
 469       dest.writemask = brw_writemask_for_size(instr->num_components);
 470
 471       emit(MOV(dest, src));
 472       break;
 473    }
 474
 475    case nir_intrinsic_store_output_indirect:
 476       has_indirect = true;
 477       /* fallthrough */
 478    case nir_intrinsic_store_output: {
 479       int varying = instr->const_index[0];
 480
 481       src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
 482                         instr->num_components);
 483       dest = dst_reg(src);
 484
 485       if (has_indirect) {
 486          dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[1],
 487                                                          BRW_REGISTER_TYPE_D,
 488                                                          1));
 489       }
 490       output_reg[varying] = dest;
 491       break;
 492    }
 493
 494    case nir_intrinsic_load_vertex_id:
 495       unreachable("should be lowered by lower_vertex_id()");
 496
 497    case nir_intrinsic_load_vertex_id_zero_base: {
 498       src_reg vertex_id =
 499          src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
 500       assert(vertex_id.file != BAD_FILE);
 501       dest = get_nir_dest(instr->dest, vertex_id.type);
 502       emit(MOV(dest, vertex_id));
 503       break;
 504    }
 505
 506    case nir_intrinsic_load_base_vertex: {
 507       src_reg base_vertex =
 508          src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
 509       assert(base_vertex.file != BAD_FILE);
 510       dest = get_nir_dest(instr->dest, base_vertex.type);
 511       emit(MOV(dest, base_vertex));
 512       break;
 513    }
 514
 515    case nir_intrinsic_load_instance_id: {
 516       src_reg instance_id =
 517          src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
 518       assert(instance_id.file != BAD_FILE);
 519       dest = get_nir_dest(instr->dest, instance_id.type);
 520       emit(MOV(dest, instance_id));
 521       break;
 522    }
 523
 524    case nir_intrinsic_load_uniform_indirect:
 525       has_indirect = true;
 526       /* fallthrough */
 527    case nir_intrinsic_load_uniform: {
 528       int uniform = instr->const_index[0];
 529
 530       dest = get_nir_dest(instr->dest);
 531
 532       if (has_indirect) {
 533          /* Split addressing into uniform and offset */
 534          int offset = uniform - nir_uniform_driver_location[uniform];
 535          assert(offset >= 0);
 536
 537          uniform -= offset;
 538          assert(uniform >= 0);
 539
 540          src = src_reg(dst_reg(UNIFORM, uniform));
 541          src.reg_offset = offset;
 542          src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
 543          src.reladdr = new(mem_ctx) src_reg(tmp);
 544       } else {
 545          src = src_reg(dst_reg(UNIFORM, uniform));
 546       }
 547
 548       emit(MOV(dest, src));
 549       break;
 550    }
 551
 552    case nir_intrinsic_atomic_counter_read:
 553    case nir_intrinsic_atomic_counter_inc:
 554    case nir_intrinsic_atomic_counter_dec: {
 555       unsigned surf_index = prog_data->base.binding_table.abo_start +
 556          (unsigned) instr->const_index[0];
 557       src_reg offset = get_nir_src(instr->src[0], nir_type_int,
 558                                    instr->num_components);
 559       dest = get_nir_dest(instr->dest);
 560
 561       switch (instr->intrinsic) {
 562          case nir_intrinsic_atomic_counter_inc:
 563             emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
 564                                 src_reg(), src_reg());
 565             break;
 566          case nir_intrinsic_atomic_counter_dec:
 567             emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
 568                                 src_reg(), src_reg());
 569             break;
 570          case nir_intrinsic_atomic_counter_read:
 571             emit_untyped_surface_read(surf_index, dest, offset);
 572             break;
 573          default:
 574             unreachable("Unreachable");
 575       }
 576
 577       brw_mark_surface_used(stage_prog_data, surf_index);
 578       break;
 579    }
 580
 581    case nir_intrinsic_load_ubo_indirect:
 582       has_indirect = true;
 583       /* fallthrough */
 584    case nir_intrinsic_load_ubo: {
 585       nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]);
 586       src_reg surf_index;
 587
 588       dest = get_nir_dest(instr->dest);
 589
 590       if (const_block_index) {
 591          /* The block index is a constant, so just emit the binding table entry
 592           * as an immediate.
 593           */
 594          surf_index = src_reg(prog_data->base.binding_table.ubo_start +
 595                               const_block_index->u[0]);
 596       } else {
 597          /* The block index is not a constant. Evaluate the index expression
 598           * per-channel and add the base UBO index; we have to select a value
 599           * from any live channel.
 600           */
 601          surf_index = src_reg(this, glsl_type::uint_type);
 602          emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int,
 603                                                    instr->num_components),
 604                   src_reg(prog_data->base.binding_table.ubo_start)));
 605          surf_index = emit_uniformize(surf_index);
 606
 607          /* Assume this may touch any UBO. It would be nice to provide
 608           * a tighter bound, but the array information is already lowered away.
 609           */
 610          brw_mark_surface_used(&prog_data->base,
 611                                prog_data->base.binding_table.ubo_start +
 612                                shader_prog->NumUniformBlocks - 1);
 613       }
 614
 615       unsigned const_offset = instr->const_index[0];
 616       src_reg offset;
 617
 618       if (!has_indirect)  {
 619          offset = src_reg(const_offset / 16);
 620       } else {
 621          offset = src_reg(this, glsl_type::uint_type);
 622          emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1),
 623                   src_reg(4u)));
 624       }
 625
 626       src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
 627       packed_consts.type = dest.type;
 628
 629       emit_pull_constant_load_reg(dst_reg(packed_consts),
 630                                   surf_index,
 631                                   offset,
 632                                   NULL, NULL /* before_block/inst */);
 633
 634       packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
 635       packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
 636                                             const_offset % 16 / 4,
 637                                             const_offset % 16 / 4,
 638                                             const_offset % 16 / 4);
 639
 640       emit(MOV(dest, packed_consts));
 641       break;
 642    }
 643
 644    default:
 645       unreachable("Unknown intrinsic");
 646    }
 647 }
 648
 649 static unsigned
 650 brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
 651 {
 652    return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
 653 }
 654
 655 static enum brw_conditional_mod
 656 brw_conditional_for_nir_comparison(nir_op op)
 657 {
 658    switch (op) {
 659    case nir_op_flt:
 660    case nir_op_ilt:
 661    case nir_op_ult:
 662       return BRW_CONDITIONAL_L;
 663
 664    case nir_op_fge:
 665    case nir_op_ige:
 666    case nir_op_uge:
 667       return BRW_CONDITIONAL_GE;
 668
 669    case nir_op_feq:
 670    case nir_op_ieq:
 671    case nir_op_ball_fequal2:
 672    case nir_op_ball_iequal2:
 673    case nir_op_ball_fequal3:
 674    case nir_op_ball_iequal3:
 675    case nir_op_ball_fequal4:
 676    case nir_op_ball_iequal4:
 677       return BRW_CONDITIONAL_Z;
 678
 679    case nir_op_fne:
 680    case nir_op_ine:
 681    case nir_op_bany_fnequal2:
 682    case nir_op_bany_inequal2:
 683    case nir_op_bany_fnequal3:
 684    case nir_op_bany_inequal3:
 685    case nir_op_bany_fnequal4:
 686    case nir_op_bany_inequal4:
 687       return BRW_CONDITIONAL_NZ;
 688
 689    default:
 690       unreachable("not reached: bad operation for comparison");
 691    }
 692 }
 693
 694 void
 695 vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
 696 {
 697    vec4_instruction *inst;
 698
 699    dst_reg dst = get_nir_dest(instr->dest.dest,
 700                               nir_op_infos[instr->op].output_type);
 701    dst.writemask = instr->dest.write_mask;
 702
 703    src_reg op[4];
 704    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
 705       op[i] = get_nir_src(instr->src[i].src,
 706                           nir_op_infos[instr->op].input_types[i], 4);
 707       op[i].swizzle = brw_swizzle_for_nir_swizzle(instr->src[i].swizzle);
 708       op[i].abs = instr->src[i].abs;
 709       op[i].negate = instr->src[i].negate;
 710    }
 711
 712    switch (instr->op) {
 713    case nir_op_imov:
 714    case nir_op_fmov:
 715       inst = emit(MOV(dst, op[0]));
 716       inst->saturate = instr->dest.saturate;
 717       break;
 718
 719    case nir_op_vec2:
 720    case nir_op_vec3:
 721    case nir_op_vec4:
 722       unreachable("not reached: should be handled by lower_vec_to_movs()");
 723
 724    case nir_op_i2f:
 725    case nir_op_u2f:
 726       inst = emit(MOV(dst, op[0]));
 727       inst->saturate = instr->dest.saturate;
 728       break;
 729
 730    case nir_op_f2i:
 731    case nir_op_f2u:
 732       inst = emit(MOV(dst, op[0]));
 733       break;
 734
 735    case nir_op_fadd:
 736       /* fall through */
 737    case nir_op_iadd:
 738       inst = emit(ADD(dst, op[0], op[1]));
 739       inst->saturate = instr->dest.saturate;
 740       break;
 741
 742    case nir_op_fmul:
 743       inst = emit(MUL(dst, op[0], op[1]));
 744       inst->saturate = instr->dest.saturate;
 745       break;
 746
 747    case nir_op_imul: {
 748       nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
 749       nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
 750
 751       /* For integer multiplication, the MUL uses the low 16 bits of one of
 752        * the operands (src0 through SNB, src1 on IVB and later). The MACH
 753        * accumulates in the contribution of the upper 16 bits of that
 754        * operand. If we can determine that one of the args is in the low
 755        * 16 bits, though, we can just emit a single MUL.
 756        */
 757       if (value0 && value0->u[0] < (1 << 16)) {
 758          if (devinfo->gen < 7)
 759             emit(MUL(dst, op[0], op[1]));
 760          else
 761             emit(MUL(dst, op[1], op[0]));
 762       } else if (value1 && value1->u[0] < (1 << 16)) {
 763          if (devinfo->gen < 7)
 764             emit(MUL(dst, op[1], op[0]));
 765          else
 766             emit(MUL(dst, op[0], op[1]));
 767       } else {
 768          struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
 769
 770          emit(MUL(acc, op[0], op[1]));
 771          emit(MACH(dst_null_d(), op[0], op[1]));
 772          emit(MOV(dst, src_reg(acc)));
 773       }
 774       break;
 775    }
 776
 777    case nir_op_imul_high:
 778    case nir_op_umul_high: {
 779       struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
 780
 781       emit(MUL(acc, op[0], op[1]));
 782       emit(MACH(dst, op[0], op[1]));
 783       break;
 784    }
 785
 786    case nir_op_frcp:
 787       inst = emit_math(SHADER_OPCODE_RCP, dst, op[0]);
 788       inst->saturate = instr->dest.saturate;
 789       break;
 790
 791    case nir_op_fexp2:
 792       inst = emit_math(SHADER_OPCODE_EXP2, dst, op[0]);
 793       inst->saturate = instr->dest.saturate;
 794       break;
 795
 796    case nir_op_flog2:
 797       inst = emit_math(SHADER_OPCODE_LOG2, dst, op[0]);
 798       inst->saturate = instr->dest.saturate;
 799       break;
 800
 801    case nir_op_fsin:
 802       inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
 803       inst->saturate = instr->dest.saturate;
 804       break;
 805
 806    case nir_op_fcos:
 807       inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
 808       inst->saturate = instr->dest.saturate;
 809       break;
 810
 811    case nir_op_idiv:
 812    case nir_op_udiv:
 813       emit_math(SHADER_OPCODE_INT_QUOTIENT, dst, op[0], op[1]);
 814       break;
 815
 816    case nir_op_umod:
 817       emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
 818       break;
 819
 820    case nir_op_ldexp:
 821       unreachable("not reached: should be handled by ldexp_to_arith()");
 822
 823    case nir_op_fsqrt:
 824       inst = emit_math(SHADER_OPCODE_SQRT, dst, op[0]);
 825       inst->saturate = instr->dest.saturate;
 826       break;
 827
 828    case nir_op_frsq:
 829       inst = emit_math(SHADER_OPCODE_RSQ, dst, op[0]);
 830       inst->saturate = instr->dest.saturate;
 831       break;
 832
 833    case nir_op_fpow:
 834       inst = emit_math(SHADER_OPCODE_POW, dst, op[0], op[1]);
 835       inst->saturate = instr->dest.saturate;
 836       break;
 837
 838    case nir_op_uadd_carry: {
 839       struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
 840
 841       emit(ADDC(dst_null_ud(), op[0], op[1]));
 842       emit(MOV(dst, src_reg(acc)));
 843       break;
 844    }
 845
 846    case nir_op_usub_borrow: {
 847       struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
 848
 849       emit(SUBB(dst_null_ud(), op[0], op[1]));
 850       emit(MOV(dst, src_reg(acc)));
 851       break;
 852    }
 853
 854    case nir_op_ftrunc:
 855       inst = emit(RNDZ(dst, op[0]));
 856       inst->saturate = instr->dest.saturate;
 857       break;
 858
 859    case nir_op_fceil: {
 860       src_reg tmp = src_reg(this, glsl_type::float_type);
 861       tmp.swizzle =
 862          brw_swizzle_for_size(instr->src[0].src.is_ssa ?
 863                               instr->src[0].src.ssa->num_components :
 864                               instr->src[0].src.reg.reg->num_components);
 865
 866       op[0].negate = !op[0].negate;
 867       emit(RNDD(dst_reg(tmp), op[0]));
 868       tmp.negate = true;
 869       inst = emit(MOV(dst, tmp));
 870       inst->saturate = instr->dest.saturate;
 871       break;
 872    }
 873
 874    case nir_op_ffloor:
 875       inst = emit(RNDD(dst, op[0]));
 876       inst->saturate = instr->dest.saturate;
 877       break;
 878
 879    case nir_op_ffract:
 880       inst = emit(FRC(dst, op[0]));
 881       inst->saturate = instr->dest.saturate;
 882       break;
 883
 884    case nir_op_fround_even:
 885       inst = emit(RNDE(dst, op[0]));
 886       inst->saturate = instr->dest.saturate;
 887       break;
 888
 889    case nir_op_fmin:
 890    case nir_op_imin:
 891    case nir_op_umin:
 892       inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
 893       inst->saturate = instr->dest.saturate;
 894       break;
 895
 896    case nir_op_fmax:
 897    case nir_op_imax:
 898    case nir_op_umax:
 899       inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
 900       inst->saturate = instr->dest.saturate;
 901       break;
 902
 903    case nir_op_fddx:
 904    case nir_op_fddx_coarse:
 905    case nir_op_fddx_fine:
 906    case nir_op_fddy:
 907    case nir_op_fddy_coarse:
 908    case nir_op_fddy_fine:
 909       unreachable("derivatives are not valid in vertex shaders");
 910
 911    case nir_op_flt:
 912    case nir_op_ilt:
 913    case nir_op_ult:
 914    case nir_op_fge:
 915    case nir_op_ige:
 916    case nir_op_uge:
 917    case nir_op_feq:
 918    case nir_op_ieq:
 919    case nir_op_fne:
 920    case nir_op_ine:
 921       emit(CMP(dst, op[0], op[1],
 922                brw_conditional_for_nir_comparison(instr->op)));
 923       break;
 924
 925    case nir_op_ball_fequal2:
 926    case nir_op_ball_iequal2:
 927    case nir_op_ball_fequal3:
 928    case nir_op_ball_iequal3:
 929    case nir_op_ball_fequal4:
 930    case nir_op_ball_iequal4: {
 931       dst_reg tmp = dst_reg(this, glsl_type::bool_type);
 932
 933       switch (instr->op) {
 934       case nir_op_ball_fequal2:
 935       case nir_op_ball_iequal2:
 936          tmp.writemask = WRITEMASK_XY;
 937          break;
 938       case nir_op_ball_fequal3:
 939       case nir_op_ball_iequal3:
 940          tmp.writemask = WRITEMASK_XYZ;
 941          break;
 942       case nir_op_ball_fequal4:
 943       case nir_op_ball_iequal4:
 944          tmp.writemask = WRITEMASK_XYZW;
 945          break;
 946       default:
 947          unreachable("not reached");
 948       }
 949
 950       emit(CMP(tmp, op[0], op[1],
 951                brw_conditional_for_nir_comparison(instr->op)));
 952       emit(MOV(dst, src_reg(0)));
 953       inst = emit(MOV(dst, src_reg(~0)));
 954       inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
 955       break;
 956    }
 957
 958    case nir_op_bany_fnequal2:
 959    case nir_op_bany_inequal2:
 960    case nir_op_bany_fnequal3:
 961    case nir_op_bany_inequal3:
 962    case nir_op_bany_fnequal4:
 963    case nir_op_bany_inequal4: {
 964       dst_reg tmp = dst_reg(this, glsl_type::bool_type);
 965
 966       switch (instr->op) {
 967       case nir_op_bany_fnequal2:
 968       case nir_op_bany_inequal2:
 969          tmp.writemask = WRITEMASK_XY;
 970          break;
 971       case nir_op_bany_fnequal3:
 972       case nir_op_bany_inequal3:
 973          tmp.writemask = WRITEMASK_XYZ;
 974          break;
 975       case nir_op_bany_fnequal4:
 976       case nir_op_bany_inequal4:
 977          tmp.writemask = WRITEMASK_XYZW;
 978          break;
 979       default:
 980          unreachable("not reached");
 981       }
 982
 983       emit(CMP(tmp, op[0], op[1],
 984                brw_conditional_for_nir_comparison(instr->op)));
 985
 986       emit(MOV(dst, src_reg(0)));
 987       inst = emit(MOV(dst, src_reg(~0)));
 988       inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 989       break;
 990    }
 991
 992    case nir_op_inot:
 993       emit(NOT(dst, op[0]));
 994       break;
 995
 996    case nir_op_ixor:
 997       emit(XOR(dst, op[0], op[1]));
 998       break;
 999
1000    case nir_op_ior:
1001       emit(OR(dst, op[0], op[1]));
1002       break;
1003
1004    case nir_op_iand:
1005       emit(AND(dst, op[0], op[1]));
1006       break;
1007
1008    case nir_op_b2i:
1009       emit(AND(dst, op[0], src_reg(1)));
1010       break;
1011
1012    case nir_op_b2f:
1013       op[0].type = BRW_REGISTER_TYPE_D;
1014       dst.type = BRW_REGISTER_TYPE_D;
1015       emit(AND(dst, op[0], src_reg(0x3f800000u)));
1016       dst.type = BRW_REGISTER_TYPE_F;
1017       break;
1018
1019    case nir_op_f2b:
1020       emit(CMP(dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1021       break;
1022
1023    case nir_op_i2b:
1024       emit(CMP(dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1025       break;
1026
1027    case nir_op_fnoise1_1:
1028    case nir_op_fnoise1_2:
1029    case nir_op_fnoise1_3:
1030    case nir_op_fnoise1_4:
1031    case nir_op_fnoise2_1:
1032    case nir_op_fnoise2_2:
1033    case nir_op_fnoise2_3:
1034    case nir_op_fnoise2_4:
1035    case nir_op_fnoise3_1:
1036    case nir_op_fnoise3_2:
1037    case nir_op_fnoise3_3:
1038    case nir_op_fnoise3_4:
1039    case nir_op_fnoise4_1:
1040    case nir_op_fnoise4_2:
1041    case nir_op_fnoise4_3:
1042    case nir_op_fnoise4_4:
1043       unreachable("not reached: should be handled by lower_noise");
1044
1045    case nir_op_unpack_half_2x16_split_x:
1046    case nir_op_unpack_half_2x16_split_y:
1047    case nir_op_pack_half_2x16_split:
1048       unreachable("not reached: should not occur in vertex shader");
1049
1050    case nir_op_unpack_snorm_2x16:
1051    case nir_op_unpack_unorm_2x16:
1052    case nir_op_pack_snorm_2x16:
1053    case nir_op_pack_unorm_2x16:
1054       unreachable("not reached: should be handled by lower_packing_builtins");
1055
1056    case nir_op_unpack_half_2x16:
1057       /* As NIR does not guarantee that we have a correct swizzle outside the
1058        * boundaries of a vector, and the implementation of emit_unpack_half_2x16
1059        * uses the source operand in an operation with WRITEMASK_Y while our
1060        * source operand has only size 1, it accessed incorrect data producing
1061        * regressions in Piglit. We repeat the swizzle of the first component on the
1062        * rest of components to avoid regressions. In the vec4_visitor IR code path
1063        * this is not needed because the operand has already the correct swizzle.
1064        */
1065       op[0].swizzle = brw_compose_swizzle(BRW_SWIZZLE_XXXX, op[0].swizzle);
1066       emit_unpack_half_2x16(dst, op[0]);
1067       break;
1068
1069    case nir_op_pack_half_2x16:
1070       emit_pack_half_2x16(dst, op[0]);
1071       break;
1072
1073    case nir_op_unpack_unorm_4x8:
1074       emit_unpack_unorm_4x8(dst, op[0]);
1075       break;
1076
1077    case nir_op_pack_unorm_4x8:
1078       emit_pack_unorm_4x8(dst, op[0]);
1079       break;
1080
1081    case nir_op_unpack_snorm_4x8:
1082       emit_unpack_snorm_4x8(dst, op[0]);
1083       break;
1084
1085    case nir_op_pack_snorm_4x8:
1086       emit_pack_snorm_4x8(dst, op[0]);
1087       break;
1088
1089    case nir_op_bitfield_reverse:
1090       emit(BFREV(dst, op[0]));
1091       break;
1092
1093    case nir_op_bit_count:
1094       emit(CBIT(dst, op[0]));
1095       break;
1096
1097    case nir_op_ufind_msb:
1098    case nir_op_ifind_msb: {
1099       src_reg temp = src_reg(this, glsl_type::uint_type);
1100
1101       inst = emit(FBH(dst_reg(temp), op[0]));
1102       inst->dst.writemask = WRITEMASK_XYZW;
1103
1104       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1105        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1106        * subtract the result from 31 to convert the MSB count into an LSB count.
1107        */
1108
1109       /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1110       temp.swizzle = BRW_SWIZZLE_NOOP;
1111       emit(MOV(dst, temp));
1112
1113       src_reg src_tmp = src_reg(dst);
1114       emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
1115
1116       src_tmp.negate = true;
1117       inst = emit(ADD(dst, src_tmp, src_reg(31)));
1118       inst->predicate = BRW_PREDICATE_NORMAL;
1119       break;
1120    }
1121
1122    case nir_op_find_lsb:
1123       emit(FBL(dst, op[0]));
1124       break;
1125
1126    case nir_op_ubitfield_extract:
1127    case nir_op_ibitfield_extract:
1128       op[0] = fix_3src_operand(op[0]);
1129       op[1] = fix_3src_operand(op[1]);
1130       op[2] = fix_3src_operand(op[2]);
1131
1132       emit(BFE(dst, op[2], op[1], op[0]));
1133       break;
1134
1135    case nir_op_bfm:
1136       emit(BFI1(dst, op[0], op[1]));
1137       break;
1138
1139    case nir_op_bfi:
1140       op[0] = fix_3src_operand(op[0]);
1141       op[1] = fix_3src_operand(op[1]);
1142       op[2] = fix_3src_operand(op[2]);
1143
1144       emit(BFI2(dst, op[0], op[1], op[2]));
1145       break;
1146
1147    case nir_op_bitfield_insert:
1148       unreachable("not reached: should be handled by "
1149                   "lower_instructions::bitfield_insert_to_bfm_bfi");
1150
1151    case nir_op_fsign:
1152       /* AND(val, 0x80000000) gives the sign bit.
1153        *
1154        * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
1155        * zero.
1156        */
1157       emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
1158
1159       op[0].type = BRW_REGISTER_TYPE_UD;
1160       dst.type = BRW_REGISTER_TYPE_UD;
1161       emit(AND(dst, op[0], src_reg(0x80000000u)));
1162
1163       inst = emit(OR(dst, src_reg(dst), src_reg(0x3f800000u)));
1164       inst->predicate = BRW_PREDICATE_NORMAL;
1165       dst.type = BRW_REGISTER_TYPE_F;
1166
1167       if (instr->dest.saturate) {
1168          inst = emit(MOV(dst, src_reg(dst)));
1169          inst->saturate = true;
1170       }
1171       break;
1172
1173    case nir_op_isign:
1174       /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
1175        *               -> non-negative val generates 0x00000000.
1176        *  Predicated OR sets 1 if val is positive.
1177        */
1178       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
1179       emit(ASR(dst, op[0], src_reg(31)));
1180       inst = emit(OR(dst, src_reg(dst), src_reg(1)));
1181       inst->predicate = BRW_PREDICATE_NORMAL;
1182       break;
1183
1184    case nir_op_ishl:
1185       emit(SHL(dst, op[0], op[1]));
1186       break;
1187
1188    case nir_op_ishr:
1189       emit(ASR(dst, op[0], op[1]));
1190       break;
1191
1192    case nir_op_ushr:
1193       emit(SHR(dst, op[0], op[1]));
1194       break;
1195
1196    case nir_op_ffma:
1197       op[0] = fix_3src_operand(op[0]);
1198       op[1] = fix_3src_operand(op[1]);
1199       op[2] = fix_3src_operand(op[2]);
1200
1201       inst = emit(MAD(dst, op[2], op[1], op[0]));
1202       inst->saturate = instr->dest.saturate;
1203       break;
1204
1205    case nir_op_flrp:
1206       inst = emit_lrp(dst, op[0], op[1], op[2]);
1207       inst->saturate = instr->dest.saturate;
1208       break;
1209
1210    case nir_op_bcsel:
1211       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
1212       inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
1213       inst->predicate = BRW_PREDICATE_NORMAL;
1214       break;
1215
1216    default:
1217       unreachable("Unimplemented ALU operation");
1218    }
1219 }
1220
1221 void
1222 vec4_visitor::nir_emit_jump(nir_jump_instr *instr)
1223 {
1224    /* @TODO: Not yet implemented */
1225 }
1226
1227 void
1228 vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
1229 {
1230    /* @TODO: Not yet implemented */
1231 }
1232
1233 }