src/compiler/nir/nir_lower_io_to_vector.c

   1 /*
   2  * Copyright © 2019 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "nir_deref.h"
  27
  28 /** @file nir_lower_io_to_vector.c
  29  *
  30  * Merges compatible input/output variables residing in different components
  31  * of the same location. It's expected that further passes such as
  32  * nir_lower_io_to_temporaries will combine loads and stores of the merged
  33  * variables, producing vector nir_load_input/nir_store_output instructions
  34  * when all is said and done.
  35  */
  36
  37 static const struct glsl_type *
  38 resize_array_vec_type(const struct glsl_type *type, unsigned num_components)
  39 {
  40    if (glsl_type_is_array(type)) {
  41       const struct glsl_type *arr_elem =
  42          resize_array_vec_type(glsl_get_array_element(type), num_components);
  43       return glsl_array_type(arr_elem, glsl_get_length(type), 0);
  44    } else {
  45       assert(glsl_type_is_vector_or_scalar(type));
  46       return glsl_vector_type(glsl_get_base_type(type), num_components);
  47    }
  48 }
  49
  50 static bool
  51 variable_can_rewrite(const nir_variable *var)
  52 {
  53    /* Only touch user defined varyings as these are the only ones we split */
  54    if (var->data.location < VARYING_SLOT_VAR0)
  55       return false;
  56
  57    /* Skip complex types we don't split in the first place */
  58    if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
  59       return false;
  60
  61    /* TODO: add 64/16bit support ? */
  62    if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
  63       return false;
  64
  65    return true;
  66 }
  67
  68 static bool
  69 variables_can_merge(nir_shader *shader,
  70                     const nir_variable *a, const nir_variable *b)
  71 {
  72    const struct glsl_type *a_type_tail = a->type;
  73    const struct glsl_type *b_type_tail = b->type;
  74
  75    /* They must have the same array structure */
  76    while (glsl_type_is_array(a_type_tail)) {
  77       if (!glsl_type_is_array(b_type_tail))
  78          return false;
  79
  80       if (glsl_get_length(a_type_tail) != glsl_get_length(b_type_tail))
  81          return false;
  82
  83       a_type_tail = glsl_get_array_element(a_type_tail);
  84       b_type_tail = glsl_get_array_element(b_type_tail);
  85    }
  86
  87    if (!glsl_type_is_vector_or_scalar(a_type_tail) ||
  88        !glsl_type_is_vector_or_scalar(b_type_tail))
  89       return false;
  90
  91    if (glsl_get_base_type(a->type) != glsl_get_base_type(b->type))
  92       return false;
  93
  94    assert(a->data.mode == b->data.mode);
  95    if (shader->info.stage == MESA_SHADER_FRAGMENT &&
  96        a->data.mode == nir_var_shader_in &&
  97        a->data.interpolation != b->data.interpolation)
  98       return false;
  99
 100    return true;
 101 }
 102
 103 static bool
 104 create_new_io_vars(nir_shader *shader, struct exec_list *io_list,
 105                    nir_variable *old_vars[MAX_VARYINGS_INCL_PATCH][4],
 106                    nir_variable *new_vars[MAX_VARYINGS_INCL_PATCH][4])
 107 {
 108    if (exec_list_is_empty(io_list))
 109       return false;
 110
 111    nir_foreach_variable(var, io_list) {
 112       if (variable_can_rewrite(var)) {
 113          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
 114          unsigned frac = var->data.location_frac;
 115          old_vars[loc][frac] = var;
 116       }
 117    }
 118
 119    bool merged_any_vars = false;
 120
 121    /* We don't handle combining vars of different type e.g. different array
 122     * lengths.
 123     */
 124    for (unsigned loc = 0; loc < MAX_VARYINGS_INCL_PATCH; loc++) {
 125       unsigned frac = 0;
 126       while (frac < 4) {
 127          nir_variable *first_var = old_vars[loc][frac];
 128          if (!first_var) {
 129             frac++;
 130             continue;
 131          }
 132
 133          int first = frac;
 134          bool found_merge = false;
 135
 136          while (frac < 4) {
 137             nir_variable *var = old_vars[loc][frac];
 138             if (!var)
 139                break;
 140
 141             if (var != first_var) {
 142                if (!variables_can_merge(shader, first_var, var))
 143                   break;
 144
 145                found_merge = true;
 146             }
 147
 148             const unsigned num_components =
 149                glsl_get_components(glsl_without_array(var->type));
 150
 151             /* We had better not have any overlapping vars */
 152             for (unsigned i = 1; i < num_components; i++)
 153                assert(old_vars[loc][frac + i] == NULL);
 154
 155             frac += num_components;
 156          }
 157
 158          if (!found_merge)
 159             continue;
 160
 161          merged_any_vars = true;
 162
 163          nir_variable *var = nir_variable_clone(old_vars[loc][first], shader);
 164          var->data.location_frac = first;
 165          var->type = resize_array_vec_type(var->type, frac - first);
 166
 167          nir_shader_add_variable(shader, var);
 168          for (unsigned i = first; i < frac; i++)
 169             new_vars[loc][i] = var;
 170       }
 171    }
 172
 173    return merged_any_vars;
 174 }
 175
 176 static nir_deref_instr *
 177 build_array_deref_of_new_var(nir_builder *b, nir_variable *new_var,
 178                              nir_deref_instr *leader)
 179 {
 180    if (leader->deref_type == nir_deref_type_var)
 181       return nir_build_deref_var(b, new_var);
 182
 183    nir_deref_instr *parent =
 184       build_array_deref_of_new_var(b, new_var, nir_deref_instr_parent(leader));
 185
 186    return nir_build_deref_follower(b, parent, leader);
 187 }
 188
 189 static bool
 190 nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes)
 191 {
 192    assert(!(modes & ~(nir_var_shader_in | nir_var_shader_out)));
 193
 194    nir_builder b;
 195    nir_builder_init(&b, impl);
 196
 197    nir_metadata_require(impl, nir_metadata_dominance);
 198
 199    nir_shader *shader = impl->function->shader;
 200    nir_variable *old_inputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
 201    nir_variable *new_inputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
 202    nir_variable *old_outputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
 203    nir_variable *new_outputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
 204
 205    if (modes & nir_var_shader_in) {
 206       /* Vertex shaders support overlapping inputs.  We don't do those */
 207       assert(b.shader->info.stage != MESA_SHADER_VERTEX);
 208
 209       /* If we don't actually merge any variables, remove that bit from modes
 210        * so we don't bother doing extra non-work.
 211        */
 212       if (!create_new_io_vars(shader, &shader->inputs,
 213                               old_inputs, new_inputs))
 214          modes &= ~nir_var_shader_in;
 215    }
 216
 217    if (modes & nir_var_shader_out) {
 218       /* Fragment shader outputs are always vec4.  You shouldn't have
 219        * scalarized them and it doesn't make sense to vectorize them.
 220        */
 221       assert(b.shader->info.stage != MESA_SHADER_FRAGMENT);
 222
 223       /* If we don't actually merge any variables, remove that bit from modes
 224        * so we don't bother doing extra non-work.
 225        */
 226       if (!create_new_io_vars(shader, &shader->outputs,
 227                               old_outputs, new_outputs))
 228          modes &= ~nir_var_shader_out;
 229    }
 230
 231    if (!modes)
 232       return false;
 233
 234    bool progress = false;
 235
 236    /* Actually lower all the IO load/store intrinsics.  Load instructions are
 237     * lowered to a vector load and an ALU instruction to grab the channels we
 238     * want.  Outputs are lowered to a write-masked store of the vector output.
 239     * For non-TCS outputs, we then run nir_lower_io_to_temporaries at the end
 240     * to clean up the partial writes.
 241     */
 242    nir_foreach_block(block, impl) {
 243       nir_foreach_instr_safe(instr, block) {
 244          if (instr->type != nir_instr_type_intrinsic)
 245             continue;
 246
 247          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 248
 249          switch (intrin->intrinsic) {
 250          case nir_intrinsic_load_deref:
 251          case nir_intrinsic_interp_deref_at_centroid:
 252          case nir_intrinsic_interp_deref_at_sample:
 253          case nir_intrinsic_interp_deref_at_offset: {
 254             nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]);
 255             if (!(old_deref->mode & modes))
 256                break;
 257
 258             if (old_deref->mode == nir_var_shader_out)
 259                assert(b.shader->info.stage == MESA_SHADER_TESS_CTRL);
 260
 261             nir_variable *old_var = nir_deref_instr_get_variable(old_deref);
 262             if (old_var->data.location < VARYING_SLOT_VAR0)
 263                break;
 264
 265             const unsigned loc = old_var->data.location - VARYING_SLOT_VAR0;
 266             const unsigned old_frac = old_var->data.location_frac;
 267             nir_variable *new_var = old_deref->mode == nir_var_shader_in ?
 268                                     new_inputs[loc][old_frac] :
 269                                     new_outputs[loc][old_frac];
 270             if (!new_var)
 271                break;
 272
 273             assert(new_var->data.location == VARYING_SLOT_VAR0 + loc);
 274             const unsigned new_frac = new_var->data.location_frac;
 275
 276             nir_component_mask_t vec4_comp_mask =
 277                ((1 << intrin->num_components) - 1) << old_frac;
 278
 279             b.cursor = nir_before_instr(&intrin->instr);
 280
 281             /* Rewrite the load to use the new variable and only select a
 282              * portion of the result.
 283              */
 284             nir_deref_instr *new_deref =
 285                build_array_deref_of_new_var(&b, new_var, old_deref);
 286             assert(glsl_type_is_vector(new_deref->type));
 287             nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
 288                                   nir_src_for_ssa(&new_deref->dest.ssa));
 289
 290             intrin->num_components =
 291                glsl_get_components(new_deref->type);
 292             intrin->dest.ssa.num_components = intrin->num_components;
 293
 294             b.cursor = nir_after_instr(&intrin->instr);
 295
 296             nir_ssa_def *new_vec = nir_channels(&b, &intrin->dest.ssa,
 297                                                 vec4_comp_mask >> new_frac);
 298             nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
 299                                            nir_src_for_ssa(new_vec),
 300                                            new_vec->parent_instr);
 301
 302             progress = true;
 303             break;
 304          }
 305
 306          case nir_intrinsic_store_deref: {
 307             nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]);
 308             if (old_deref->mode != nir_var_shader_out)
 309                break;
 310
 311             nir_variable *old_var = nir_deref_instr_get_variable(old_deref);
 312             if (old_var->data.location < VARYING_SLOT_VAR0)
 313                break;
 314
 315             const unsigned loc = old_var->data.location - VARYING_SLOT_VAR0;
 316             const unsigned old_frac = old_var->data.location_frac;
 317             nir_variable *new_var = new_outputs[loc][old_frac];
 318             if (!new_var)
 319                break;
 320
 321             assert(new_var->data.location == VARYING_SLOT_VAR0 + loc);
 322             const unsigned new_frac = new_var->data.location_frac;
 323
 324             b.cursor = nir_before_instr(&intrin->instr);
 325
 326             /* Rewrite the store to be a masked store to the new variable */
 327             nir_deref_instr *new_deref =
 328                build_array_deref_of_new_var(&b, new_var, old_deref);
 329             assert(glsl_type_is_vector(new_deref->type));
 330             nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
 331                                   nir_src_for_ssa(&new_deref->dest.ssa));
 332
 333             intrin->num_components =
 334                glsl_get_components(new_deref->type);
 335
 336             nir_component_mask_t old_wrmask = nir_intrinsic_write_mask(intrin);
 337
 338             assert(intrin->src[1].is_ssa);
 339             nir_ssa_def *old_value = intrin->src[1].ssa;
 340             nir_ssa_def *comps[4];
 341             for (unsigned c = 0; c < intrin->num_components; c++) {
 342                if (new_frac + c >= old_frac &&
 343                    (old_wrmask & 1 << (new_frac + c - old_frac))) {
 344                   comps[c] = nir_channel(&b, old_value,
 345                                          new_frac + c - old_frac);
 346                } else {
 347                   comps[c] = nir_ssa_undef(&b, old_value->num_components,
 348                                                old_value->bit_size);
 349                }
 350             }
 351             nir_ssa_def *new_value = nir_vec(&b, comps, intrin->num_components);
 352             nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
 353                                   nir_src_for_ssa(new_value));
 354
 355             nir_intrinsic_set_write_mask(intrin,
 356                                          old_wrmask << (old_frac - new_frac));
 357
 358             progress = true;
 359             break;
 360          }
 361
 362          default:
 363             break;
 364          }
 365       }
 366    }
 367
 368    if (progress) {
 369       nir_metadata_preserve(impl, nir_metadata_block_index |
 370                                   nir_metadata_dominance);
 371    }
 372
 373    return progress;
 374 }
 375
 376 bool
 377 nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode modes)
 378 {
 379    bool progress = false;
 380
 381    nir_foreach_function(function, shader) {
 382       if (function->impl)
 383          progress |= nir_lower_io_to_vector_impl(function->impl, modes);
 384    }
 385
 386    return progress;
 387 }