src/compiler/nir/nir_opt_copy_prop_vars.c

   1 /*
   2  * Copyright © 2016 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "nir_deref.h"
  27
  28 #include "util/bitscan.h"
  29 #include "util/u_dynarray.h"
  30
  31 /**
  32  * Variable-based copy propagation
  33  *
  34  * Normally, NIR trusts in SSA form for most of its copy-propagation needs.
  35  * However, there are cases, especially when dealing with indirects, where SSA
  36  * won't help you.  This pass is for those times.  Specifically, it handles
  37  * the following things that the rest of NIR can't:
  38  *
  39  *  1) Copy-propagation on variables that have indirect access.  This includes
  40  *     propagating from indirect stores into indirect loads.
  41  *
  42  *  2) Removal of redundant load_deref intrinsics.  We can't trust regular CSE
  43  *     to do this because it isn't aware of variable writes that may alias the
  44  *     value and make the former load invalid.
  45  *
  46  * This pass uses an intermediate solution between being local / "per-block"
  47  * and a complete data-flow analysis.  It follows the control flow graph, and
  48  * propagate the available copy information forward, invalidating data at each
  49  * cf_node.
  50  *
  51  * Removal of dead writes to variables is handled by another pass.
  52  */
  53
  54 struct vars_written {
  55    nir_variable_mode modes;
  56
  57    /* Key is deref and value is the uintptr_t with the write mask. */
  58    struct hash_table *derefs;
  59 };
  60
  61 struct value {
  62    bool is_ssa;
  63    union {
  64       nir_ssa_def *ssa[4];
  65       nir_deref_instr *deref;
  66    };
  67 };
  68
  69 struct copy_entry {
  70    struct value src;
  71
  72    nir_deref_instr *dst;
  73 };
  74
  75 struct copy_prop_var_state {
  76    nir_function_impl *impl;
  77
  78    void *mem_ctx;
  79    void *lin_ctx;
  80
  81    /* Maps nodes to vars_written.  Used to invalidate copy entries when
  82     * visiting each node.
  83     */
  84    struct hash_table *vars_written_map;
  85
  86    bool progress;
  87 };
  88
  89 static struct vars_written *
  90 create_vars_written(struct copy_prop_var_state *state)
  91 {
  92    struct vars_written *written =
  93       linear_zalloc_child(state->lin_ctx, sizeof(struct vars_written));
  94    written->derefs = _mesa_hash_table_create(state->mem_ctx, _mesa_hash_pointer,
  95                                              _mesa_key_pointer_equal);
  96    return written;
  97 }
  98
  99 static void
 100 gather_vars_written(struct copy_prop_var_state *state,
 101                     struct vars_written *written,
 102                     nir_cf_node *cf_node)
 103 {
 104    struct vars_written *new_written = NULL;
 105
 106    switch (cf_node->type) {
 107    case nir_cf_node_function: {
 108       nir_function_impl *impl = nir_cf_node_as_function(cf_node);
 109       foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
 110          gather_vars_written(state, NULL, cf_node);
 111       break;
 112    }
 113
 114    case nir_cf_node_block: {
 115       if (!written)
 116          break;
 117
 118       nir_block *block = nir_cf_node_as_block(cf_node);
 119       nir_foreach_instr(instr, block) {
 120          if (instr->type == nir_instr_type_call) {
 121             written->modes |= nir_var_shader_out |
 122                               nir_var_global |
 123                               nir_var_local |
 124                               nir_var_shader_storage |
 125                               nir_var_shared;
 126             continue;
 127          }
 128
 129          if (instr->type != nir_instr_type_intrinsic)
 130             continue;
 131
 132          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 133          switch (intrin->intrinsic) {
 134          case nir_intrinsic_barrier:
 135          case nir_intrinsic_memory_barrier:
 136             written->modes |= nir_var_shader_out |
 137                               nir_var_shader_storage |
 138                               nir_var_shared;
 139             break;
 140
 141          case nir_intrinsic_emit_vertex:
 142          case nir_intrinsic_emit_vertex_with_counter:
 143             written->modes = nir_var_shader_out;
 144             break;
 145
 146          case nir_intrinsic_store_deref:
 147          case nir_intrinsic_copy_deref: {
 148             /* Destination in _both_ store_deref and copy_deref is src[0]. */
 149             nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
 150
 151             uintptr_t mask = intrin->intrinsic == nir_intrinsic_store_deref ?
 152                nir_intrinsic_write_mask(intrin) : (1 << glsl_get_vector_elements(dst->type)) - 1;
 153
 154             struct hash_entry *ht_entry = _mesa_hash_table_search(written->derefs, dst);
 155             if (ht_entry)
 156                ht_entry->data = (void *)(mask | (uintptr_t)ht_entry->data);
 157             else
 158                _mesa_hash_table_insert(written->derefs, dst, (void *)mask);
 159
 160             break;
 161          }
 162
 163          default:
 164             break;
 165          }
 166       }
 167
 168       break;
 169    }
 170
 171    case nir_cf_node_if: {
 172       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
 173
 174       new_written = create_vars_written(state);
 175
 176       foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
 177          gather_vars_written(state, new_written, cf_node);
 178
 179       foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
 180          gather_vars_written(state, new_written, cf_node);
 181
 182       break;
 183    }
 184
 185    case nir_cf_node_loop: {
 186       nir_loop *loop = nir_cf_node_as_loop(cf_node);
 187
 188       new_written = create_vars_written(state);
 189
 190       foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
 191          gather_vars_written(state, new_written, cf_node);
 192
 193       break;
 194    }
 195
 196    default:
 197       unreachable("Invalid CF node type");
 198    }
 199
 200    if (new_written) {
 201       /* Merge new information to the parent control flow node. */
 202       if (written) {
 203          written->modes |= new_written->modes;
 204          hash_table_foreach(new_written->derefs, new_entry) {
 205             struct hash_entry *old_entry =
 206                _mesa_hash_table_search_pre_hashed(written->derefs, new_entry->hash,
 207                                                   new_entry->key);
 208             if (old_entry) {
 209                nir_component_mask_t merged = (uintptr_t) new_entry->data |
 210                                              (uintptr_t) old_entry->data;
 211                old_entry->data = (void *) ((uintptr_t) merged);
 212             } else {
 213                _mesa_hash_table_insert_pre_hashed(written->derefs, new_entry->hash,
 214                                                   new_entry->key, new_entry->data);
 215             }
 216          }
 217       }
 218       _mesa_hash_table_insert(state->vars_written_map, cf_node, new_written);
 219    }
 220 }
 221
 222 static struct copy_entry *
 223 copy_entry_create(struct util_dynarray *copies,
 224                   nir_deref_instr *dst_deref)
 225 {
 226    struct copy_entry new_entry = {
 227       .dst = dst_deref,
 228    };
 229    util_dynarray_append(copies, struct copy_entry, new_entry);
 230    return util_dynarray_top_ptr(copies, struct copy_entry);
 231 }
 232
 233 /* Remove copy entry by swapping it with the last element and reducing the
 234  * size.  If used inside an iteration on copies, it must be a reverse
 235  * (backwards) iteration.  It is safe to use in those cases because the swap
 236  * will not affect the rest of the iteration.
 237  */
 238 static void
 239 copy_entry_remove(struct util_dynarray *copies,
 240                   struct copy_entry *entry)
 241 {
 242    /* This also works when removing the last element since pop don't shrink
 243     * the memory used by the array, so the swap is useless but not invalid.
 244     */
 245    *entry = util_dynarray_pop(copies, struct copy_entry);
 246 }
 247
 248 static struct copy_entry *
 249 lookup_entry_for_deref(struct util_dynarray *copies,
 250                        nir_deref_instr *deref,
 251                        nir_deref_compare_result allowed_comparisons)
 252 {
 253    util_dynarray_foreach(copies, struct copy_entry, iter) {
 254       if (nir_compare_derefs(iter->dst, deref) & allowed_comparisons)
 255          return iter;
 256    }
 257
 258    return NULL;
 259 }
 260
 261 static struct copy_entry *
 262 lookup_entry_and_kill_aliases(struct util_dynarray *copies,
 263                               nir_deref_instr *deref,
 264                               unsigned write_mask)
 265 {
 266    /* TODO: Take into account the write_mask. */
 267
 268    struct copy_entry *entry = NULL;
 269    util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
 270       if (!iter->src.is_ssa) {
 271          /* If this write aliases the source of some entry, get rid of it */
 272          if (nir_compare_derefs(iter->src.deref, deref) & nir_derefs_may_alias_bit) {
 273             copy_entry_remove(copies, iter);
 274             continue;
 275          }
 276       }
 277
 278       nir_deref_compare_result comp = nir_compare_derefs(iter->dst, deref);
 279
 280       if (comp & nir_derefs_equal_bit) {
 281          assert(entry == NULL);
 282          entry = iter;
 283       } else if (comp & nir_derefs_may_alias_bit) {
 284          copy_entry_remove(copies, iter);
 285       }
 286    }
 287
 288    return entry;
 289 }
 290
 291 static void
 292 kill_aliases(struct util_dynarray *copies,
 293              nir_deref_instr *deref,
 294              unsigned write_mask)
 295 {
 296    /* TODO: Take into account the write_mask. */
 297
 298    struct copy_entry *entry =
 299       lookup_entry_and_kill_aliases(copies, deref, write_mask);
 300    if (entry)
 301       copy_entry_remove(copies, entry);
 302 }
 303
 304 static struct copy_entry *
 305 get_entry_and_kill_aliases(struct util_dynarray *copies,
 306                            nir_deref_instr *deref,
 307                            unsigned write_mask)
 308 {
 309    /* TODO: Take into account the write_mask. */
 310
 311    struct copy_entry *entry =
 312       lookup_entry_and_kill_aliases(copies, deref, write_mask);
 313
 314    if (entry == NULL)
 315       entry = copy_entry_create(copies, deref);
 316
 317    return entry;
 318 }
 319
 320 static void
 321 apply_barrier_for_modes(struct util_dynarray *copies,
 322                         nir_variable_mode modes)
 323 {
 324    util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
 325       nir_variable *dst_var = nir_deref_instr_get_variable(iter->dst);
 326       nir_variable *src_var = iter->src.is_ssa ? NULL :
 327          nir_deref_instr_get_variable(iter->src.deref);
 328
 329       if ((dst_var->data.mode & modes) ||
 330           (src_var && (src_var->data.mode & modes)))
 331          copy_entry_remove(copies, iter);
 332    }
 333 }
 334
 335 static void
 336 store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
 337                const struct value *value, unsigned write_mask)
 338 {
 339    if (value->is_ssa) {
 340       entry->src.is_ssa = true;
 341       /* Only overwrite the written components */
 342       for (unsigned i = 0; i < 4; i++) {
 343          if (write_mask & (1 << i))
 344             entry->src.ssa[i] = value->ssa[i];
 345       }
 346    } else {
 347       /* Non-ssa stores always write everything */
 348       entry->src.is_ssa = false;
 349       entry->src.deref = value->deref;
 350    }
 351 }
 352
 353 /* Do a "load" from an SSA-based entry return it in "value" as a value with a
 354  * single SSA def.  Because an entry could reference up to 4 different SSA
 355  * defs, a vecN operation may be inserted to combine them into a single SSA
 356  * def before handing it back to the caller.  If the load instruction is no
 357  * longer needed, it is removed and nir_instr::block is set to NULL.  (It is
 358  * possible, in some cases, for the load to be used in the vecN operation in
 359  * which case it isn't deleted.)
 360  */
 361 static bool
 362 load_from_ssa_entry_value(struct copy_prop_var_state *state,
 363                           struct copy_entry *entry,
 364                           nir_builder *b, nir_intrinsic_instr *intrin,
 365                           struct value *value)
 366 {
 367    *value = entry->src;
 368    assert(value->is_ssa);
 369
 370    const struct glsl_type *type = entry->dst->type;
 371    unsigned num_components = glsl_get_vector_elements(type);
 372
 373    nir_component_mask_t available = 0;
 374    bool all_same = true;
 375    for (unsigned i = 0; i < num_components; i++) {
 376       if (value->ssa[i])
 377          available |= (1 << i);
 378
 379       if (value->ssa[i] != value->ssa[0])
 380          all_same = false;
 381    }
 382
 383    if (all_same) {
 384       /* Our work here is done */
 385       b->cursor = nir_instr_remove(&intrin->instr);
 386       intrin->instr.block = NULL;
 387       return true;
 388    }
 389
 390    if (available != (1 << num_components) - 1 &&
 391        intrin->intrinsic == nir_intrinsic_load_deref &&
 392        (available & nir_ssa_def_components_read(&intrin->dest.ssa)) == 0) {
 393       /* If none of the components read are available as SSA values, then we
 394        * should just bail.  Otherwise, we would end up replacing the uses of
 395        * the load_deref a vecN() that just gathers up its components.
 396        */
 397       return false;
 398    }
 399
 400    b->cursor = nir_after_instr(&intrin->instr);
 401
 402    nir_ssa_def *load_def =
 403       intrin->intrinsic == nir_intrinsic_load_deref ? &intrin->dest.ssa : NULL;
 404
 405    bool keep_intrin = false;
 406    nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
 407    for (unsigned i = 0; i < num_components; i++) {
 408       if (value->ssa[i]) {
 409          comps[i] = nir_channel(b, value->ssa[i], i);
 410       } else {
 411          /* We don't have anything for this component in our
 412           * list.  Just re-use a channel from the load.
 413           */
 414          if (load_def == NULL)
 415             load_def = nir_load_deref(b, entry->dst);
 416
 417          if (load_def->parent_instr == &intrin->instr)
 418             keep_intrin = true;
 419
 420          comps[i] = nir_channel(b, load_def, i);
 421       }
 422    }
 423
 424    nir_ssa_def *vec = nir_vec(b, comps, num_components);
 425    for (unsigned i = 0; i < num_components; i++)
 426       value->ssa[i] = vec;
 427
 428    if (!keep_intrin) {
 429       /* Removing this instruction should not touch the cursor because we
 430        * created the cursor after the intrinsic and have added at least one
 431        * instruction (the vec) since then.
 432        */
 433       assert(b->cursor.instr != &intrin->instr);
 434       nir_instr_remove(&intrin->instr);
 435       intrin->instr.block = NULL;
 436    }
 437
 438    return true;
 439 }
 440
 441 /**
 442  * Specialize the wildcards in a deref chain
 443  *
 444  * This function returns a deref chain identical to \param deref except that
 445  * some of its wildcards are replaced with indices from \param specific.  The
 446  * process is guided by \param guide which references the same type as \param
 447  * specific but has the same wildcard array lengths as \param deref.
 448  */
 449 static nir_deref_instr *
 450 specialize_wildcards(nir_builder *b,
 451                      nir_deref_path *deref,
 452                      nir_deref_path *guide,
 453                      nir_deref_path *specific)
 454 {
 455    nir_deref_instr **deref_p = &deref->path[1];
 456    nir_deref_instr **guide_p = &guide->path[1];
 457    nir_deref_instr **spec_p = &specific->path[1];
 458    nir_deref_instr *ret_tail = deref->path[0];
 459    for (; *deref_p; deref_p++) {
 460       if ((*deref_p)->deref_type == nir_deref_type_array_wildcard) {
 461          /* This is where things get tricky.  We have to search through
 462           * the entry deref to find its corresponding wildcard and fill
 463           * this slot in with the value from the src.
 464           */
 465          while (*guide_p &&
 466                 (*guide_p)->deref_type != nir_deref_type_array_wildcard) {
 467             guide_p++;
 468             spec_p++;
 469          }
 470          assert(*guide_p && *spec_p);
 471
 472          ret_tail = nir_build_deref_follower(b, ret_tail, *spec_p);
 473
 474          guide_p++;
 475          spec_p++;
 476       } else {
 477          ret_tail = nir_build_deref_follower(b, ret_tail, *deref_p);
 478       }
 479    }
 480
 481    return ret_tail;
 482 }
 483
 484 /* Do a "load" from an deref-based entry return it in "value" as a value.  The
 485  * deref returned in "value" will always be a fresh copy so the caller can
 486  * steal it and assign it to the instruction directly without copying it
 487  * again.
 488  */
 489 static bool
 490 load_from_deref_entry_value(struct copy_prop_var_state *state,
 491                             struct copy_entry *entry,
 492                             nir_builder *b, nir_intrinsic_instr *intrin,
 493                             nir_deref_instr *src, struct value *value)
 494 {
 495    *value = entry->src;
 496
 497    b->cursor = nir_instr_remove(&intrin->instr);
 498
 499    nir_deref_path entry_dst_path, src_path;
 500    nir_deref_path_init(&entry_dst_path, entry->dst, state->mem_ctx);
 501    nir_deref_path_init(&src_path, src, state->mem_ctx);
 502
 503    bool need_to_specialize_wildcards = false;
 504    nir_deref_instr **entry_p = &entry_dst_path.path[1];
 505    nir_deref_instr **src_p = &src_path.path[1];
 506    while (*entry_p && *src_p) {
 507       nir_deref_instr *entry_tail = *entry_p++;
 508       nir_deref_instr *src_tail = *src_p++;
 509
 510       if (src_tail->deref_type == nir_deref_type_array &&
 511           entry_tail->deref_type == nir_deref_type_array_wildcard)
 512          need_to_specialize_wildcards = true;
 513    }
 514
 515    /* If the entry deref is longer than the source deref then it refers to a
 516     * smaller type and we can't source from it.
 517     */
 518    assert(*entry_p == NULL);
 519
 520    if (need_to_specialize_wildcards) {
 521       /* The entry has some wildcards that are not in src.  This means we need
 522        * to construct a new deref based on the entry but using the wildcards
 523        * from the source and guided by the entry dst.  Oof.
 524        */
 525       nir_deref_path entry_src_path;
 526       nir_deref_path_init(&entry_src_path, entry->src.deref, state->mem_ctx);
 527       value->deref = specialize_wildcards(b, &entry_src_path,
 528                                           &entry_dst_path, &src_path);
 529       nir_deref_path_finish(&entry_src_path);
 530    }
 531
 532    /* If our source deref is longer than the entry deref, that's ok because
 533     * it just means the entry deref needs to be extended a bit.
 534     */
 535    while (*src_p) {
 536       nir_deref_instr *src_tail = *src_p++;
 537       value->deref = nir_build_deref_follower(b, value->deref, src_tail);
 538    }
 539
 540    nir_deref_path_finish(&entry_dst_path);
 541    nir_deref_path_finish(&src_path);
 542
 543    return true;
 544 }
 545
 546 static bool
 547 try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
 548                     nir_builder *b, nir_intrinsic_instr *intrin,
 549                     nir_deref_instr *src, struct value *value)
 550 {
 551    if (entry == NULL)
 552       return false;
 553
 554    if (entry->src.is_ssa) {
 555       return load_from_ssa_entry_value(state, entry, b, intrin, value);
 556    } else {
 557       return load_from_deref_entry_value(state, entry, b, intrin, src, value);
 558    }
 559 }
 560
 561 static void
 562 invalidate_copies_for_cf_node(struct copy_prop_var_state *state,
 563                               struct util_dynarray *copies,
 564                               nir_cf_node *cf_node)
 565 {
 566    struct hash_entry *ht_entry = _mesa_hash_table_search(state->vars_written_map, cf_node);
 567    assert(ht_entry);
 568
 569    struct vars_written *written = ht_entry->data;
 570    if (written->modes) {
 571       util_dynarray_foreach_reverse(copies, struct copy_entry, entry) {
 572          if (entry->dst->mode & written->modes)
 573             copy_entry_remove(copies, entry);
 574       }
 575    }
 576
 577    hash_table_foreach (written->derefs, entry) {
 578       nir_deref_instr *deref_written = (nir_deref_instr *)entry->key;
 579       kill_aliases(copies, deref_written, (uintptr_t)entry->data);
 580    }
 581 }
 582
 583 static void
 584 copy_prop_vars_block(struct copy_prop_var_state *state,
 585                      nir_builder *b, nir_block *block,
 586                      struct util_dynarray *copies)
 587 {
 588    nir_foreach_instr_safe(instr, block) {
 589       if (instr->type == nir_instr_type_call) {
 590          apply_barrier_for_modes(copies, nir_var_shader_out |
 591                                          nir_var_global |
 592                                          nir_var_local |
 593                                          nir_var_shader_storage |
 594                                          nir_var_shared);
 595          continue;
 596       }
 597
 598       if (instr->type != nir_instr_type_intrinsic)
 599          continue;
 600
 601       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 602       switch (intrin->intrinsic) {
 603       case nir_intrinsic_barrier:
 604       case nir_intrinsic_memory_barrier:
 605          apply_barrier_for_modes(copies, nir_var_shader_out |
 606                                          nir_var_shader_storage |
 607                                          nir_var_shared);
 608          break;
 609
 610       case nir_intrinsic_emit_vertex:
 611       case nir_intrinsic_emit_vertex_with_counter:
 612          apply_barrier_for_modes(copies, nir_var_shader_out);
 613          break;
 614
 615       case nir_intrinsic_load_deref: {
 616          nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
 617
 618          struct copy_entry *src_entry =
 619             lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit);
 620          struct value value;
 621          if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
 622             if (value.is_ssa) {
 623                /* lookup_load has already ensured that we get a single SSA
 624                 * value that has all of the channels.  We just have to do the
 625                 * rewrite operation.
 626                 */
 627                if (intrin->instr.block) {
 628                   /* The lookup left our instruction in-place.  This means it
 629                    * must have used it to vec up a bunch of different sources.
 630                    * We need to be careful when rewriting uses so we don't
 631                    * rewrite the vecN itself.
 632                    */
 633                   nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
 634                                                  nir_src_for_ssa(value.ssa[0]),
 635                                                  value.ssa[0]->parent_instr);
 636                } else {
 637                   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
 638                                            nir_src_for_ssa(value.ssa[0]));
 639                }
 640             } else {
 641                /* We're turning it into a load of a different variable */
 642                intrin->src[0] = nir_src_for_ssa(&value.deref->dest.ssa);
 643
 644                /* Put it back in again. */
 645                nir_builder_instr_insert(b, instr);
 646
 647                value.is_ssa = true;
 648                for (unsigned i = 0; i < intrin->num_components; i++)
 649                   value.ssa[i] = &intrin->dest.ssa;
 650             }
 651             state->progress = true;
 652          } else {
 653             value.is_ssa = true;
 654             for (unsigned i = 0; i < intrin->num_components; i++)
 655                value.ssa[i] = &intrin->dest.ssa;
 656          }
 657
 658          /* Now that we have a value, we're going to store it back so that we
 659           * have the right value next time we come looking for it.  In order
 660           * to do this, we need an exact match, not just something that
 661           * contains what we're looking for.
 662           */
 663          struct copy_entry *store_entry =
 664             lookup_entry_for_deref(copies, src, nir_derefs_equal_bit);
 665          if (!store_entry)
 666             store_entry = copy_entry_create(copies, src);
 667
 668          /* Set up a store to this entry with the value of the load.  This way
 669           * we can potentially remove subsequent loads.  However, we use a
 670           * NULL instruction so we don't try and delete the load on a
 671           * subsequent store.
 672           */
 673          store_to_entry(state, store_entry, &value,
 674                         ((1 << intrin->num_components) - 1));
 675          break;
 676       }
 677
 678       case nir_intrinsic_store_deref: {
 679          struct value value = {
 680             .is_ssa = true
 681          };
 682
 683          for (unsigned i = 0; i < intrin->num_components; i++)
 684             value.ssa[i] = intrin->src[1].ssa;
 685
 686          nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
 687          unsigned wrmask = nir_intrinsic_write_mask(intrin);
 688          struct copy_entry *entry =
 689             get_entry_and_kill_aliases(copies, dst, wrmask);
 690          store_to_entry(state, entry, &value, wrmask);
 691          break;
 692       }
 693
 694       case nir_intrinsic_copy_deref: {
 695          nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
 696          nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
 697
 698          if (nir_compare_derefs(src, dst) & nir_derefs_equal_bit) {
 699             /* This is a no-op self-copy.  Get rid of it */
 700             nir_instr_remove(instr);
 701             continue;
 702          }
 703
 704          struct copy_entry *src_entry =
 705             lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit);
 706          struct value value;
 707          if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
 708             if (value.is_ssa) {
 709                nir_store_deref(b, dst, value.ssa[0], 0xf);
 710                intrin = nir_instr_as_intrinsic(nir_builder_last_instr(b));
 711             } else {
 712                /* If this would be a no-op self-copy, don't bother. */
 713                if (nir_compare_derefs(value.deref, dst) & nir_derefs_equal_bit)
 714                   continue;
 715
 716                /* Just turn it into a copy of a different deref */
 717                intrin->src[1] = nir_src_for_ssa(&value.deref->dest.ssa);
 718
 719                /* Put it back in again. */
 720                nir_builder_instr_insert(b, instr);
 721             }
 722
 723             state->progress = true;
 724          } else {
 725             value = (struct value) {
 726                .is_ssa = false,
 727                { .deref = src },
 728             };
 729          }
 730
 731          struct copy_entry *dst_entry =
 732             get_entry_and_kill_aliases(copies, dst, 0xf);
 733          store_to_entry(state, dst_entry, &value, 0xf);
 734          break;
 735       }
 736
 737       default:
 738          break;
 739       }
 740    }
 741 }
 742
 743 static void
 744 copy_prop_vars_cf_node(struct copy_prop_var_state *state,
 745                        struct util_dynarray *copies,
 746                        nir_cf_node *cf_node)
 747 {
 748    switch (cf_node->type) {
 749    case nir_cf_node_function: {
 750       nir_function_impl *impl = nir_cf_node_as_function(cf_node);
 751
 752       struct util_dynarray impl_copies;
 753       util_dynarray_init(&impl_copies, state->mem_ctx);
 754
 755       foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
 756          copy_prop_vars_cf_node(state, &impl_copies, cf_node);
 757
 758       break;
 759    }
 760
 761    case nir_cf_node_block: {
 762       nir_block *block = nir_cf_node_as_block(cf_node);
 763       nir_builder b;
 764       nir_builder_init(&b, state->impl);
 765       copy_prop_vars_block(state, &b, block, copies);
 766       break;
 767    }
 768
 769    case nir_cf_node_if: {
 770       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
 771
 772       /* Clone the copies for each branch of the if statement.  The idea is
 773        * that they both see the same state of available copies, but do not
 774        * interfere to each other.
 775        */
 776
 777       struct util_dynarray then_copies;
 778       util_dynarray_clone(&then_copies, state->mem_ctx, copies);
 779
 780       struct util_dynarray else_copies;
 781       util_dynarray_clone(&else_copies, state->mem_ctx, copies);
 782
 783       foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
 784          copy_prop_vars_cf_node(state, &then_copies, cf_node);
 785
 786       foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
 787          copy_prop_vars_cf_node(state, &else_copies, cf_node);
 788
 789       /* Both branches copies can be ignored, since the effect of running both
 790        * branches was captured in the first pass that collects vars_written.
 791        */
 792
 793       invalidate_copies_for_cf_node(state, copies, cf_node);
 794
 795       break;
 796    }
 797
 798    case nir_cf_node_loop: {
 799       nir_loop *loop = nir_cf_node_as_loop(cf_node);
 800
 801       /* Invalidate before cloning the copies for the loop, since the loop
 802        * body can be executed more than once.
 803        */
 804
 805       invalidate_copies_for_cf_node(state, copies, cf_node);
 806
 807       struct util_dynarray loop_copies;
 808       util_dynarray_clone(&loop_copies, state->mem_ctx, copies);
 809
 810       foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
 811          copy_prop_vars_cf_node(state, &loop_copies, cf_node);
 812
 813       break;
 814    }
 815
 816    default:
 817       unreachable("Invalid CF node type");
 818    }
 819 }
 820
 821 static bool
 822 nir_copy_prop_vars_impl(nir_function_impl *impl)
 823 {
 824    void *mem_ctx = ralloc_context(NULL);
 825
 826    struct copy_prop_var_state state = {
 827       .impl = impl,
 828       .mem_ctx = mem_ctx,
 829       .lin_ctx = linear_zalloc_parent(mem_ctx, 0),
 830
 831       .vars_written_map = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
 832                                                   _mesa_key_pointer_equal),
 833    };
 834
 835    gather_vars_written(&state, NULL, &impl->cf_node);
 836
 837    copy_prop_vars_cf_node(&state, NULL, &impl->cf_node);
 838
 839    if (state.progress) {
 840       nir_metadata_preserve(impl, nir_metadata_block_index |
 841                                   nir_metadata_dominance);
 842    }
 843
 844    ralloc_free(mem_ctx);
 845    return state.progress;
 846 }
 847
 848 bool
 849 nir_opt_copy_prop_vars(nir_shader *shader)
 850 {
 851    bool progress = false;
 852
 853    nir_foreach_function(function, shader) {
 854       if (!function->impl)
 855          continue;
 856       progress |= nir_copy_prop_vars_impl(function->impl);
 857    }
 858
 859    return progress;
 860 }