src/intel/vulkan/anv_nir_lower_multiview.c

   1 /*
   2  * Copyright © 2016 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "anv_nir.h"
  25 #include "nir/nir_builder.h"
  26 #include "util/debug.h"
  27
  28 /**
  29  * This file implements the lowering required for VK_KHR_multiview.
  30  *
  31  * When possible, Primitive Replication is used and the shader is modified to
  32  * make gl_Position an array and fill it with values for each view.
  33  *
  34  * Otherwise we implement multiview using instanced rendering.  The number of
  35  * instances in each draw call is multiplied by the number of views in the
  36  * subpass.  Then, in the shader, we divide gl_InstanceId by the number of
  37  * views and use gl_InstanceId % view_count to compute the actual ViewIndex.
  38  */
  39
  40 struct lower_multiview_state {
  41    nir_builder builder;
  42
  43    uint32_t view_mask;
  44
  45    nir_ssa_def *instance_id;
  46    nir_ssa_def *view_index;
  47 };
  48
  49 static nir_ssa_def *
  50 build_instance_id(struct lower_multiview_state *state)
  51 {
  52    assert(state->builder.shader->info.stage == MESA_SHADER_VERTEX);
  53
  54    if (state->instance_id == NULL) {
  55       nir_builder *b = &state->builder;
  56
  57       b->cursor = nir_before_block(nir_start_block(b->impl));
  58
  59       /* We use instancing for implementing multiview.  The actual instance id
  60        * is given by dividing instance_id by the number of views in this
  61        * subpass.
  62        */
  63       state->instance_id =
  64          nir_idiv(b, nir_load_instance_id(b),
  65                      nir_imm_int(b, util_bitcount(state->view_mask)));
  66    }
  67
  68    return state->instance_id;
  69 }
  70
  71 static nir_ssa_def *
  72 build_view_index(struct lower_multiview_state *state)
  73 {
  74    if (state->view_index == NULL) {
  75       nir_builder *b = &state->builder;
  76
  77       b->cursor = nir_before_block(nir_start_block(b->impl));
  78
  79       assert(state->view_mask != 0);
  80       if (util_bitcount(state->view_mask) == 1) {
  81          /* Set the view index directly. */
  82          state->view_index = nir_imm_int(b, ffs(state->view_mask) - 1);
  83       } else if (state->builder.shader->info.stage == MESA_SHADER_VERTEX) {
  84          /* We only support 16 viewports */
  85          assert((state->view_mask & 0xffff0000) == 0);
  86
  87          /* We use instancing for implementing multiview.  The compacted view
  88           * id is given by instance_id % view_count.  We then have to convert
  89           * that to an actual view id.
  90           */
  91          nir_ssa_def *compacted =
  92             nir_umod(b, nir_load_instance_id(b),
  93                         nir_imm_int(b, util_bitcount(state->view_mask)));
  94
  95          if (util_is_power_of_two_or_zero(state->view_mask + 1)) {
  96             /* If we have a full view mask, then compacted is what we want */
  97             state->view_index = compacted;
  98          } else {
  99             /* Now we define a map from compacted view index to the actual
 100              * view index that's based on the view_mask.  The map is given by
 101              * 16 nibbles, each of which is a value from 0 to 15.
 102              */
 103             uint64_t remap = 0;
 104             uint32_t bit, i = 0;
 105             for_each_bit(bit, state->view_mask) {
 106                assert(bit < 16);
 107                remap |= (uint64_t)bit << (i++ * 4);
 108             }
 109
 110             nir_ssa_def *shift = nir_imul(b, compacted, nir_imm_int(b, 4));
 111
 112             /* One of these days, when we have int64 everywhere, this will be
 113              * easier.
 114              */
 115             nir_ssa_def *shifted;
 116             if (remap <= UINT32_MAX) {
 117                shifted = nir_ushr(b, nir_imm_int(b, remap), shift);
 118             } else {
 119                nir_ssa_def *shifted_low =
 120                   nir_ushr(b, nir_imm_int(b, remap), shift);
 121                nir_ssa_def *shifted_high =
 122                   nir_ushr(b, nir_imm_int(b, remap >> 32),
 123                               nir_isub(b, shift, nir_imm_int(b, 32)));
 124                shifted = nir_bcsel(b, nir_ilt(b, shift, nir_imm_int(b, 32)),
 125                                       shifted_low, shifted_high);
 126             }
 127             state->view_index = nir_iand(b, shifted, nir_imm_int(b, 0xf));
 128          }
 129       } else {
 130          const struct glsl_type *type = glsl_int_type();
 131          if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
 132              b->shader->info.stage == MESA_SHADER_GEOMETRY)
 133             type = glsl_array_type(type, 1, 0);
 134
 135          nir_variable *idx_var =
 136             nir_variable_create(b->shader, nir_var_shader_in,
 137                                 type, "view index");
 138          idx_var->data.location = VARYING_SLOT_VIEW_INDEX;
 139          if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
 140             idx_var->data.interpolation = INTERP_MODE_FLAT;
 141
 142          nir_deref_instr *deref = nir_build_deref_var(b, idx_var);
 143          if (glsl_type_is_array(type))
 144             deref = nir_build_deref_array_imm(b, deref, 0);
 145
 146          state->view_index = nir_load_deref(b, deref);
 147       }
 148    }
 149
 150    return state->view_index;
 151 }
 152
 153 /* Primitive Replication allows a shader to write different positions for each
 154  * view in the same execution. If only the position depends on the view, then
 155  * it is possible to use the feature instead of instancing to implement
 156  * multiview.
 157  */
 158 static bool
 159 lower_multiview_with_primitive_replication(nir_shader *shader,
 160                                            struct anv_graphics_pipeline *pipeline)
 161 {
 162    if (shader->info.stage == MESA_SHADER_FRAGMENT)
 163       return false;
 164
 165    assert(shader->info.stage == MESA_SHADER_VERTEX);
 166
 167    uint32_t view_mask = pipeline->subpass->view_mask;
 168    int view_count = util_bitcount(view_mask);
 169    assert(view_count > 1 && view_count <= MAX_VIEWS_FOR_PRIMITIVE_REPLICATION);
 170
 171    nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
 172
 173    /* Update position to refer to an array. */
 174    nir_variable *pos_var = NULL;
 175    nir_foreach_variable(var, &shader->outputs) {
 176       if (var->data.location == VARYING_SLOT_POS) {
 177          assert(var->type == glsl_vec4_type());
 178          var->type = glsl_array_type(glsl_vec4_type(), view_count, 0);
 179          var->data.per_view = true;
 180          pos_var = var;
 181          break;
 182       }
 183    }
 184
 185    assert(pos_var);
 186
 187    nir_cf_list body;
 188    nir_cf_list_extract(&body, &entrypoint->body);
 189
 190    nir_builder b;
 191    nir_builder_init(&b, entrypoint);
 192    b.cursor = nir_after_cf_list(&entrypoint->body);
 193
 194    /* Fill Layer ID with zero.  Replication will use that as base to apply the
 195     * RTAI offsets.
 196     */
 197    nir_variable *layer_id_out =
 198       nir_variable_create(shader, nir_var_shader_out,
 199                           glsl_int_type(), "layer ID");
 200    layer_id_out->data.location = VARYING_SLOT_LAYER;
 201    nir_store_var(&b, layer_id_out, nir_imm_zero(&b, 1, 32), 0x1);
 202
 203    /* Loop Index will go from 0 to view_count. */
 204    nir_variable *loop_index_var =
 205       nir_local_variable_create(entrypoint, glsl_uint_type(), "loop_index");
 206    nir_deref_instr *loop_index_deref = nir_build_deref_var(&b, loop_index_var);
 207    nir_store_deref(&b, loop_index_deref, nir_imm_int(&b, 0), 1);
 208
 209    /* Array of view index values that are active in the loop.  Note that the
 210     * loop index only matches the view index if there are no gaps in the
 211     * view_mask.
 212     */
 213    nir_variable *view_index_var = nir_local_variable_create(
 214       entrypoint, glsl_array_type(glsl_uint_type(), view_count, 0), "view_index");
 215    nir_deref_instr *view_index_deref = nir_build_deref_var(&b, view_index_var);
 216    {
 217       int array_position = 0;
 218       uint32_t view_index;
 219       for_each_bit(view_index, view_mask) {
 220          nir_store_deref(&b, nir_build_deref_array_imm(&b, view_index_deref, array_position),
 221                          nir_imm_int(&b, view_index), 1);
 222          array_position++;
 223       }
 224    }
 225
 226    /* Create the equivalent of
 227     *
 228     *    while (true):
 229     *       if (loop_index >= view_count):
 230     *          break
 231     *
 232     *       view_index = active_indices[loop_index]
 233     *       pos_deref = &pos[loop_index]
 234     *
 235     *       # Placeholder for the body to be reinserted.
 236     *
 237     *       loop_index += 1
 238     *
 239     * Later both `view_index` and `pos_deref` will be used to rewrite the
 240     * original shader body.
 241     */
 242
 243    nir_loop* loop = nir_push_loop(&b);
 244
 245    nir_ssa_def *loop_index = nir_load_deref(&b, loop_index_deref);
 246    nir_ssa_def *cmp = nir_ige(&b, loop_index, nir_imm_int(&b, view_count));
 247    nir_if *loop_check = nir_push_if(&b, cmp);
 248    nir_jump(&b, nir_jump_break);
 249    nir_pop_if(&b, loop_check);
 250
 251    nir_ssa_def *view_index =
 252       nir_load_deref(&b, nir_build_deref_array(&b, view_index_deref, loop_index));
 253    nir_deref_instr *pos_deref =
 254       nir_build_deref_array(&b, nir_build_deref_var(&b, pos_var), loop_index);
 255
 256    nir_store_deref(&b, loop_index_deref, nir_iadd_imm(&b, loop_index, 1), 1);
 257    nir_pop_loop(&b, loop);
 258
 259    /* Reinsert the body. */
 260    b.cursor = nir_after_instr(&pos_deref->instr);
 261    nir_cf_reinsert(&body, b.cursor);
 262
 263    nir_foreach_block(block, entrypoint) {
 264       nir_foreach_instr_safe(instr, block) {
 265          if (instr->type != nir_instr_type_intrinsic)
 266             continue;
 267
 268          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 269
 270          switch (intrin->intrinsic) {
 271          case nir_intrinsic_load_view_index: {
 272             assert(intrin->dest.is_ssa);
 273             nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(view_index));
 274             break;
 275          }
 276
 277          case nir_intrinsic_store_deref: {
 278             nir_variable *var = nir_intrinsic_get_var(intrin, 0);
 279             if (var == pos_var) {
 280                nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]);
 281
 282                nir_instr_rewrite_src(instr, &intrin->src[0],
 283                                      nir_src_for_ssa(&pos_deref->dest.ssa));
 284
 285                /* Remove old deref since it has the wrong type. */
 286                nir_deref_instr_remove_if_unused(old_deref);
 287             }
 288             break;
 289          }
 290
 291          case nir_intrinsic_load_deref:
 292             if (nir_intrinsic_get_var(intrin, 0) == pos_var) {
 293                unreachable("Should have lowered I/O to temporaries "
 294                            "so no load_deref on position output is expected.");
 295             }
 296             break;
 297
 298          case nir_intrinsic_copy_deref:
 299             unreachable("Should have lowered copy_derefs at this point");
 300             break;
 301
 302          default:
 303             /* Do nothing. */
 304             break;
 305          }
 306       }
 307    }
 308
 309    nir_metadata_preserve(entrypoint, nir_metadata_none);
 310    return true;
 311 }
 312
 313 bool
 314 anv_nir_lower_multiview(nir_shader *shader,
 315                         struct anv_graphics_pipeline *pipeline)
 316 {
 317    assert(shader->info.stage != MESA_SHADER_COMPUTE);
 318    uint32_t view_mask = pipeline->subpass->view_mask;
 319
 320    /* If multiview isn't enabled, we have nothing to do. */
 321    if (view_mask == 0)
 322       return false;
 323
 324    if (pipeline->use_primitive_replication)
 325       return lower_multiview_with_primitive_replication(shader, pipeline);
 326
 327    struct lower_multiview_state state = {
 328       .view_mask = view_mask,
 329    };
 330
 331    /* This pass assumes a single entrypoint */
 332    nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
 333
 334    nir_builder_init(&state.builder, entrypoint);
 335
 336    bool progress = false;
 337    nir_foreach_block(block, entrypoint) {
 338       nir_foreach_instr_safe(instr, block) {
 339          if (instr->type != nir_instr_type_intrinsic)
 340             continue;
 341
 342          nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
 343
 344          if (load->intrinsic != nir_intrinsic_load_instance_id &&
 345              load->intrinsic != nir_intrinsic_load_view_index)
 346             continue;
 347
 348          assert(load->dest.is_ssa);
 349
 350          nir_ssa_def *value;
 351          if (load->intrinsic == nir_intrinsic_load_instance_id) {
 352             value = build_instance_id(&state);
 353          } else {
 354             assert(load->intrinsic == nir_intrinsic_load_view_index);
 355             value = build_view_index(&state);
 356          }
 357
 358          nir_ssa_def_rewrite_uses(&load->dest.ssa, nir_src_for_ssa(value));
 359
 360          nir_instr_remove(&load->instr);
 361          progress = true;
 362       }
 363    }
 364
 365    /* The view index is available in all stages but the instance id is only
 366     * available in the VS.  If it's not a fragment shader, we need to pass
 367     * the view index on to the next stage.
 368     */
 369    if (shader->info.stage != MESA_SHADER_FRAGMENT) {
 370       nir_ssa_def *view_index = build_view_index(&state);
 371
 372       nir_builder *b = &state.builder;
 373
 374       assert(view_index->parent_instr->block == nir_start_block(entrypoint));
 375       b->cursor = nir_after_instr(view_index->parent_instr);
 376
 377       /* Unless there is only one possible view index (that would be set
 378        * directly), pass it to the next stage. */
 379       if (util_bitcount(state.view_mask) != 1) {
 380          nir_variable *view_index_out =
 381             nir_variable_create(shader, nir_var_shader_out,
 382                                 glsl_int_type(), "view index");
 383          view_index_out->data.location = VARYING_SLOT_VIEW_INDEX;
 384          nir_store_var(b, view_index_out, view_index, 0x1);
 385       }
 386
 387       nir_variable *layer_id_out =
 388          nir_variable_create(shader, nir_var_shader_out,
 389                              glsl_int_type(), "layer ID");
 390       layer_id_out->data.location = VARYING_SLOT_LAYER;
 391       nir_store_var(b, layer_id_out, view_index, 0x1);
 392
 393       progress = true;
 394    }
 395
 396    if (progress) {
 397       nir_metadata_preserve(entrypoint, nir_metadata_block_index |
 398                                         nir_metadata_dominance);
 399    }
 400
 401    return progress;
 402 }
 403
 404 static bool
 405 shader_writes_to_memory(nir_shader *shader)
 406 {
 407    /* With multiview, we would need to ensure that memory writes happen either
 408     * once or once per view. Since combination of multiview and memory writes
 409     * is not expected, we'll just skip this optimization in this case.
 410     */
 411
 412    nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
 413
 414    nir_foreach_block(block, entrypoint) {
 415       nir_foreach_instr(instr, block) {
 416          if (instr->type != nir_instr_type_intrinsic)
 417             continue;
 418          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 419
 420          switch (intrin->intrinsic) {
 421          case nir_intrinsic_deref_atomic_add:
 422          case nir_intrinsic_deref_atomic_imin:
 423          case nir_intrinsic_deref_atomic_umin:
 424          case nir_intrinsic_deref_atomic_imax:
 425          case nir_intrinsic_deref_atomic_umax:
 426          case nir_intrinsic_deref_atomic_and:
 427          case nir_intrinsic_deref_atomic_or:
 428          case nir_intrinsic_deref_atomic_xor:
 429          case nir_intrinsic_deref_atomic_exchange:
 430          case nir_intrinsic_deref_atomic_comp_swap:
 431          case nir_intrinsic_store_ssbo:
 432          case nir_intrinsic_ssbo_atomic_add:
 433          case nir_intrinsic_ssbo_atomic_imin:
 434          case nir_intrinsic_ssbo_atomic_umin:
 435          case nir_intrinsic_ssbo_atomic_imax:
 436          case nir_intrinsic_ssbo_atomic_umax:
 437          case nir_intrinsic_ssbo_atomic_and:
 438          case nir_intrinsic_ssbo_atomic_or:
 439          case nir_intrinsic_ssbo_atomic_xor:
 440          case nir_intrinsic_ssbo_atomic_exchange:
 441          case nir_intrinsic_ssbo_atomic_comp_swap:
 442          case nir_intrinsic_store_shared:
 443          case nir_intrinsic_shared_atomic_add:
 444          case nir_intrinsic_shared_atomic_imin:
 445          case nir_intrinsic_shared_atomic_umin:
 446          case nir_intrinsic_shared_atomic_imax:
 447          case nir_intrinsic_shared_atomic_umax:
 448          case nir_intrinsic_shared_atomic_and:
 449          case nir_intrinsic_shared_atomic_or:
 450          case nir_intrinsic_shared_atomic_xor:
 451          case nir_intrinsic_shared_atomic_exchange:
 452          case nir_intrinsic_shared_atomic_comp_swap:
 453          case nir_intrinsic_image_deref_store:
 454          case nir_intrinsic_image_deref_atomic_add:
 455          case nir_intrinsic_image_deref_atomic_umin:
 456          case nir_intrinsic_image_deref_atomic_umax:
 457          case nir_intrinsic_image_deref_atomic_imin:
 458          case nir_intrinsic_image_deref_atomic_imax:
 459          case nir_intrinsic_image_deref_atomic_and:
 460          case nir_intrinsic_image_deref_atomic_or:
 461          case nir_intrinsic_image_deref_atomic_xor:
 462          case nir_intrinsic_image_deref_atomic_exchange:
 463          case nir_intrinsic_image_deref_atomic_comp_swap:
 464             return true;
 465
 466          default:
 467             /* Keep walking. */
 468             break;
 469          }
 470       }
 471    }
 472
 473    return false;
 474 }
 475
 476 static bool
 477 shader_uses_view_index(nir_shader *shader)
 478 {
 479    nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
 480
 481    nir_foreach_block(block, entrypoint) {
 482       nir_foreach_instr(instr, block) {
 483          if (instr->type != nir_instr_type_intrinsic)
 484             continue;
 485
 486          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 487          if (intrin->intrinsic == nir_intrinsic_load_view_index)
 488             return true;
 489       }
 490    }
 491
 492    return false;
 493 }
 494
 495 static bool
 496 shader_only_position_uses_view_index(nir_shader *shader)
 497 {
 498    nir_shader *shader_no_position = nir_shader_clone(NULL, shader);
 499    nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader_no_position);
 500
 501    /* Remove the store position from a cloned shader. */
 502    nir_foreach_block(block, entrypoint) {
 503       nir_foreach_instr_safe(instr, block) {
 504          if (instr->type != nir_instr_type_intrinsic)
 505             continue;
 506
 507          nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr);
 508          if (store->intrinsic != nir_intrinsic_store_deref)
 509             continue;
 510
 511          nir_variable *var = nir_intrinsic_get_var(store, 0);
 512          if (var->data.location != VARYING_SLOT_POS)
 513             continue;
 514
 515          nir_instr_remove(&store->instr);
 516       }
 517    }
 518
 519    /* Clean up shader so unused load_view_index intrinsics are removed. */
 520    bool progress;
 521    do {
 522       progress = false;
 523       progress |= nir_opt_dead_cf(shader_no_position);
 524
 525       /* Peephole select will drop if-blocks that have then and else empty,
 526        * which will remove the usage of an SSA in the condition.
 527        */
 528       progress |= nir_opt_peephole_select(shader_no_position, 0, false, false);
 529
 530       progress |= nir_opt_dce(shader_no_position);
 531    } while (progress);
 532
 533    bool uses_view_index = shader_uses_view_index(shader_no_position);
 534
 535    ralloc_free(shader_no_position);
 536    return !uses_view_index;
 537 }
 538
 539 bool
 540 anv_check_for_primitive_replication(nir_shader **shaders,
 541                                     struct anv_graphics_pipeline *pipeline)
 542 {
 543    assert(pipeline->base.device->info.gen >= 12);
 544
 545    static int primitive_replication_max_views = -1;
 546    if (primitive_replication_max_views < 0) {
 547       /* TODO: Figure out why we are not getting same benefits for larger than
 548        * 2 views.  For now use Primitive Replication just for the 2-view case
 549        * by default.
 550        */
 551       const unsigned default_max_views = 2;
 552
 553       primitive_replication_max_views =
 554          MIN2(MAX_VIEWS_FOR_PRIMITIVE_REPLICATION,
 555               env_var_as_unsigned("ANV_PRIMITIVE_REPLICATION_MAX_VIEWS",
 556                                   default_max_views));
 557    }
 558
 559    /* TODO: We should be able to support replication at 'geometry' stages
 560     * later than Vertex.  In that case only the last stage can refer to
 561     * gl_ViewIndex.
 562     */
 563    if (pipeline->active_stages != (VK_SHADER_STAGE_VERTEX_BIT |
 564                                    VK_SHADER_STAGE_FRAGMENT_BIT)) {
 565       return false;
 566    }
 567
 568    uint32_t view_mask = pipeline->subpass->view_mask;
 569    int view_count = util_bitcount(view_mask);
 570    if (view_count == 1 || view_count > primitive_replication_max_views)
 571       return false;
 572
 573    bool vs_writes_position = false;
 574    nir_foreach_variable(var, &shaders[MESA_SHADER_VERTEX]->outputs) {
 575       if (var->data.location == VARYING_SLOT_POS) {
 576          vs_writes_position = true;
 577          break;
 578       }
 579    }
 580
 581    /* Don't bother handling this edge case with Primitive Replication. */
 582    if (!vs_writes_position)
 583       return false;
 584
 585    return !shader_uses_view_index(shaders[MESA_SHADER_FRAGMENT]) &&
 586           !shader_writes_to_memory(shaders[MESA_SHADER_VERTEX]) &&
 587           shader_only_position_uses_view_index(shaders[MESA_SHADER_VERTEX]);
 588 }