src/compiler/nir/nir_lower_io.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Connor Abbott (cwabbott0@gmail.com)
  25  *    Jason Ekstrand (jason@jlekstrand.net)
  26  *
  27  */
  28
  29 /*
  30  * This lowering pass converts references to input/output variables with
  31  * loads/stores to actual input/output intrinsics.
  32  */
  33
  34 #include "nir.h"
  35 #include "nir_builder.h"
  36
  37 struct lower_io_state {
  38    nir_builder builder;
  39    void *mem_ctx;
  40    int (*type_size)(const struct glsl_type *type);
  41    nir_variable_mode mode;
  42 };
  43
  44 void
  45 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
  46                          int (*type_size)(const struct glsl_type *))
  47 {
  48    unsigned location = 0;
  49
  50    nir_foreach_variable(var, var_list) {
  51       /*
  52        * UBO's have their own address spaces, so don't count them towards the
  53        * number of global uniforms
  54        */
  55       if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
  56           var->interface_type != NULL)
  57          continue;
  58
  59       var->data.driver_location = location;
  60       location += type_size(var->type);
  61    }
  62
  63    *size = location;
  64 }
  65
  66 /**
  67  * Returns true if we're processing a stage whose inputs are arrays indexed
  68  * by a vertex number (such as geometry shader inputs).
  69  */
  70 static bool
  71 is_per_vertex_input(struct lower_io_state *state, nir_variable *var)
  72 {
  73    gl_shader_stage stage = state->builder.shader->stage;
  74
  75    return var->data.mode == nir_var_shader_in && !var->data.patch &&
  76           (stage == MESA_SHADER_TESS_CTRL ||
  77            stage == MESA_SHADER_TESS_EVAL ||
  78            stage == MESA_SHADER_GEOMETRY);
  79 }
  80
  81 static bool
  82 is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
  83 {
  84    gl_shader_stage stage = state->builder.shader->stage;
  85    return var->data.mode == nir_var_shader_out && !var->data.patch &&
  86           stage == MESA_SHADER_TESS_CTRL;
  87 }
  88
  89 static nir_ssa_def *
  90 get_io_offset(nir_builder *b, nir_deref_var *deref,
  91               nir_ssa_def **vertex_index,
  92               int (*type_size)(const struct glsl_type *))
  93 {
  94    nir_deref *tail = &deref->deref;
  95
  96    /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
  97     * outermost array index separate.  Process the rest normally.
  98     */
  99    if (vertex_index != NULL) {
 100       tail = tail->child;
 101       assert(tail->deref_type == nir_deref_type_array);
 102       nir_deref_array *deref_array = nir_deref_as_array(tail);
 103
 104       nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
 105       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
 106          vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
 107       }
 108       *vertex_index = vtx;
 109    }
 110
 111    /* Just emit code and let constant-folding go to town */
 112    nir_ssa_def *offset = nir_imm_int(b, 0);
 113
 114    while (tail->child != NULL) {
 115       const struct glsl_type *parent_type = tail->type;
 116       tail = tail->child;
 117
 118       if (tail->deref_type == nir_deref_type_array) {
 119          nir_deref_array *deref_array = nir_deref_as_array(tail);
 120          unsigned size = type_size(tail->type);
 121
 122          offset = nir_iadd(b, offset,
 123                            nir_imm_int(b, size * deref_array->base_offset));
 124
 125          if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
 126             nir_ssa_def *mul =
 127                nir_imul(b, nir_imm_int(b, size),
 128                         nir_ssa_for_src(b, deref_array->indirect, 1));
 129
 130             offset = nir_iadd(b, offset, mul);
 131          }
 132       } else if (tail->deref_type == nir_deref_type_struct) {
 133          nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
 134
 135          unsigned field_offset = 0;
 136          for (unsigned i = 0; i < deref_struct->index; i++) {
 137             field_offset += type_size(glsl_get_struct_field(parent_type, i));
 138          }
 139          offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
 140       }
 141    }
 142
 143    return offset;
 144 }
 145
 146 static nir_intrinsic_op
 147 load_op(struct lower_io_state *state,
 148         nir_variable_mode mode, bool per_vertex)
 149 {
 150    nir_intrinsic_op op;
 151    switch (mode) {
 152    case nir_var_shader_in:
 153       op = per_vertex ? nir_intrinsic_load_per_vertex_input :
 154                         nir_intrinsic_load_input;
 155       break;
 156    case nir_var_shader_out:
 157       op = per_vertex ? nir_intrinsic_load_per_vertex_output :
 158                         nir_intrinsic_load_output;
 159       break;
 160    case nir_var_uniform:
 161       op = nir_intrinsic_load_uniform;
 162       break;
 163    case nir_var_shared:
 164       op = nir_intrinsic_load_shared;
 165       break;
 166    default:
 167       unreachable("Unknown variable mode");
 168    }
 169    return op;
 170 }
 171
 172 static nir_intrinsic_op
 173 store_op(struct lower_io_state *state,
 174          nir_variable_mode mode, bool per_vertex)
 175 {
 176    nir_intrinsic_op op;
 177    switch (mode) {
 178    case nir_var_shader_in:
 179    case nir_var_shader_out:
 180       op = per_vertex ? nir_intrinsic_store_per_vertex_output :
 181                         nir_intrinsic_store_output;
 182       break;
 183    case nir_var_shared:
 184       op = nir_intrinsic_store_shared;
 185       break;
 186    default:
 187       unreachable("Unknown variable mode");
 188    }
 189    return op;
 190 }
 191
 192 static nir_intrinsic_op
 193 atomic_op(nir_intrinsic_op opcode)
 194 {
 195    switch (opcode) {
 196 #define OP(O) case nir_intrinsic_var_##O: return nir_intrinsic_shared_##O;
 197    OP(atomic_exchange)
 198    OP(atomic_comp_swap)
 199    OP(atomic_add)
 200    OP(atomic_imin)
 201    OP(atomic_umin)
 202    OP(atomic_imax)
 203    OP(atomic_umax)
 204    OP(atomic_and)
 205    OP(atomic_or)
 206    OP(atomic_xor)
 207 #undef OP
 208    default:
 209       unreachable("Invalid atomic");
 210    }
 211 }
 212
 213 static bool
 214 nir_lower_io_block(nir_block *block, void *void_state)
 215 {
 216    struct lower_io_state *state = void_state;
 217
 218    nir_builder *b = &state->builder;
 219
 220    nir_foreach_instr_safe(block, instr) {
 221       if (instr->type != nir_instr_type_intrinsic)
 222          continue;
 223
 224       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 225
 226       switch (intrin->intrinsic) {
 227       case nir_intrinsic_load_var:
 228       case nir_intrinsic_store_var:
 229       case nir_intrinsic_var_atomic_add:
 230       case nir_intrinsic_var_atomic_imin:
 231       case nir_intrinsic_var_atomic_umin:
 232       case nir_intrinsic_var_atomic_imax:
 233       case nir_intrinsic_var_atomic_umax:
 234       case nir_intrinsic_var_atomic_and:
 235       case nir_intrinsic_var_atomic_or:
 236       case nir_intrinsic_var_atomic_xor:
 237       case nir_intrinsic_var_atomic_exchange:
 238       case nir_intrinsic_var_atomic_comp_swap:
 239          /* We can lower the io for this nir instrinsic */
 240          break;
 241       default:
 242          /* We can't lower the io for this nir instrinsic, so skip it */
 243          continue;
 244       }
 245
 246       nir_variable_mode mode = intrin->variables[0]->var->data.mode;
 247
 248       if (state->mode != nir_var_all && state->mode != mode)
 249          continue;
 250
 251       if (mode != nir_var_shader_in &&
 252           mode != nir_var_shader_out &&
 253           mode != nir_var_shared &&
 254           mode != nir_var_uniform)
 255          continue;
 256
 257       b->cursor = nir_before_instr(instr);
 258
 259       switch (intrin->intrinsic) {
 260       case nir_intrinsic_load_var: {
 261          bool per_vertex =
 262             is_per_vertex_input(state, intrin->variables[0]->var) ||
 263             is_per_vertex_output(state, intrin->variables[0]->var);
 264
 265          nir_ssa_def *offset;
 266          nir_ssa_def *vertex_index;
 267
 268          offset = get_io_offset(b, intrin->variables[0],
 269                                 per_vertex ? &vertex_index : NULL,
 270                                 state->type_size);
 271
 272          nir_intrinsic_instr *load =
 273             nir_intrinsic_instr_create(state->mem_ctx,
 274                                        load_op(state, mode, per_vertex));
 275          load->num_components = intrin->num_components;
 276
 277          load->const_index[0] =
 278             intrin->variables[0]->var->data.driver_location;
 279
 280          if (load->intrinsic == nir_intrinsic_load_uniform) {
 281             load->const_index[1] =
 282                state->type_size(intrin->variables[0]->var->type);
 283          }
 284
 285          if (per_vertex)
 286             load->src[0] = nir_src_for_ssa(vertex_index);
 287
 288          load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset);
 289
 290          if (intrin->dest.is_ssa) {
 291             nir_ssa_dest_init(&load->instr, &load->dest,
 292                               intrin->num_components, NULL);
 293             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
 294                                      nir_src_for_ssa(&load->dest.ssa));
 295          } else {
 296             nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
 297          }
 298
 299          nir_instr_insert_before(&intrin->instr, &load->instr);
 300          nir_instr_remove(&intrin->instr);
 301          break;
 302       }
 303
 304       case nir_intrinsic_store_var: {
 305          assert(mode == nir_var_shader_out || mode == nir_var_shared);
 306
 307          nir_ssa_def *offset;
 308          nir_ssa_def *vertex_index;
 309
 310          bool per_vertex =
 311             is_per_vertex_output(state, intrin->variables[0]->var);
 312
 313          offset = get_io_offset(b, intrin->variables[0],
 314                                 per_vertex ? &vertex_index : NULL,
 315                                 state->type_size);
 316
 317          nir_intrinsic_instr *store =
 318             nir_intrinsic_instr_create(state->mem_ctx,
 319                                        store_op(state, mode, per_vertex));
 320          store->num_components = intrin->num_components;
 321
 322          nir_src_copy(&store->src[0], &intrin->src[0], store);
 323
 324          store->const_index[0] =
 325             intrin->variables[0]->var->data.driver_location;
 326
 327          /* Copy the writemask */
 328          store->const_index[1] = intrin->const_index[0];
 329
 330          if (per_vertex)
 331             store->src[1] = nir_src_for_ssa(vertex_index);
 332
 333          store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset);
 334
 335          nir_instr_insert_before(&intrin->instr, &store->instr);
 336          nir_instr_remove(&intrin->instr);
 337          break;
 338       }
 339
 340       case nir_intrinsic_var_atomic_add:
 341       case nir_intrinsic_var_atomic_imin:
 342       case nir_intrinsic_var_atomic_umin:
 343       case nir_intrinsic_var_atomic_imax:
 344       case nir_intrinsic_var_atomic_umax:
 345       case nir_intrinsic_var_atomic_and:
 346       case nir_intrinsic_var_atomic_or:
 347       case nir_intrinsic_var_atomic_xor:
 348       case nir_intrinsic_var_atomic_exchange:
 349       case nir_intrinsic_var_atomic_comp_swap: {
 350          assert(mode == nir_var_shared);
 351
 352          nir_ssa_def *offset;
 353
 354          offset = get_io_offset(b, intrin->variables[0],
 355                                 NULL, state->type_size);
 356
 357          nir_intrinsic_instr *atomic =
 358             nir_intrinsic_instr_create(state->mem_ctx,
 359                                        atomic_op(intrin->intrinsic));
 360
 361          atomic->src[0] = nir_src_for_ssa(offset);
 362
 363          atomic->const_index[0] =
 364             intrin->variables[0]->var->data.driver_location;
 365
 366          nir_src_copy(&atomic->src[1], &intrin->src[0], atomic);
 367
 368          if (intrin->intrinsic == nir_intrinsic_var_atomic_comp_swap)
 369             nir_src_copy(&atomic->src[2], &intrin->src[1], atomic);
 370
 371          if (intrin->dest.is_ssa) {
 372             nir_ssa_dest_init(&atomic->instr, &atomic->dest,
 373                               intrin->dest.ssa.num_components, NULL);
 374             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
 375                                      nir_src_for_ssa(&atomic->dest.ssa));
 376          } else {
 377             nir_dest_copy(&atomic->dest, &intrin->dest, state->mem_ctx);
 378          }
 379
 380          nir_instr_insert_before(&intrin->instr, &atomic->instr);
 381          nir_instr_remove(&intrin->instr);
 382          break;
 383       }
 384
 385       default:
 386          break;
 387       }
 388    }
 389
 390    return true;
 391 }
 392
 393 static void
 394 nir_lower_io_impl(nir_function_impl *impl,
 395                   nir_variable_mode mode,
 396                   int (*type_size)(const struct glsl_type *))
 397 {
 398    struct lower_io_state state;
 399
 400    nir_builder_init(&state.builder, impl);
 401    state.mem_ctx = ralloc_parent(impl);
 402    state.mode = mode;
 403    state.type_size = type_size;
 404
 405    nir_foreach_block(impl, nir_lower_io_block, &state);
 406
 407    nir_metadata_preserve(impl, nir_metadata_block_index |
 408                                nir_metadata_dominance);
 409 }
 410
 411 void
 412 nir_lower_io(nir_shader *shader, nir_variable_mode mode,
 413              int (*type_size)(const struct glsl_type *))
 414 {
 415    nir_foreach_function(shader, function) {
 416       if (function->impl)
 417          nir_lower_io_impl(function->impl, mode, type_size);
 418    }
 419 }
 420
 421 /**
 422  * Return the offset soruce for a load/store intrinsic.
 423  */
 424 nir_src *
 425 nir_get_io_offset_src(nir_intrinsic_instr *instr)
 426 {
 427    switch (instr->intrinsic) {
 428    case nir_intrinsic_load_input:
 429    case nir_intrinsic_load_output:
 430    case nir_intrinsic_load_uniform:
 431       return &instr->src[0];
 432    case nir_intrinsic_load_ubo:
 433    case nir_intrinsic_load_ssbo:
 434    case nir_intrinsic_load_per_vertex_input:
 435    case nir_intrinsic_load_per_vertex_output:
 436    case nir_intrinsic_store_output:
 437       return &instr->src[1];
 438    case nir_intrinsic_store_ssbo:
 439    case nir_intrinsic_store_per_vertex_output:
 440       return &instr->src[2];
 441    default:
 442       return NULL;
 443    }
 444 }
 445
 446 /**
 447  * Return the vertex index source for a load/store per_vertex intrinsic.
 448  */
 449 nir_src *
 450 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
 451 {
 452    switch (instr->intrinsic) {
 453    case nir_intrinsic_load_per_vertex_input:
 454    case nir_intrinsic_load_per_vertex_output:
 455       return &instr->src[0];
 456    case nir_intrinsic_store_per_vertex_output:
 457       return &instr->src[1];
 458    default:
 459       return NULL;
 460    }
 461 }