src/compiler/nir/nir_inline_functions.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "nir_control_flow.h"
  27 #include "nir_vla.h"
  28
  29 static bool inline_function_impl(nir_function_impl *impl, struct set *inlined);
  30
  31 static bool
  32 inline_functions_block(nir_block *block, nir_builder *b,
  33                        struct set *inlined)
  34 {
  35    bool progress = false;
  36    /* This is tricky.  We're iterating over instructions in a block but, as
  37     * we go, the block and its instruction list are being split into
  38     * pieces.  However, this *should* be safe since foreach_safe always
  39     * stashes the next thing in the iteration.  That next thing will
  40     * properly get moved to the next block when it gets split, and we
  41     * continue iterating there.
  42     */
  43    nir_foreach_instr_safe(instr, block) {
  44       if (instr->type != nir_instr_type_call)
  45          continue;
  46
  47       progress = true;
  48
  49       nir_call_instr *call = nir_instr_as_call(instr);
  50       assert(call->callee->impl);
  51
  52       inline_function_impl(call->callee->impl, inlined);
  53
  54       nir_function_impl *callee_copy =
  55          nir_function_impl_clone(call->callee->impl);
  56       callee_copy->function = call->callee;
  57
  58       exec_list_append(&b->impl->locals, &callee_copy->locals);
  59       exec_list_append(&b->impl->registers, &callee_copy->registers);
  60
  61       b->cursor = nir_before_instr(&call->instr);
  62
  63       /* Rewrite all of the uses of the callee's parameters to use the call
  64        * instructions sources.  In order to ensure that the "load" happens
  65        * here and not later (for register sources), we make sure to convert it
  66        * to an SSA value first.
  67        */
  68       const unsigned num_params = call->num_params;
  69       NIR_VLA(nir_ssa_def *, params, num_params);
  70       for (unsigned i = 0; i < num_params; i++) {
  71          params[i] = nir_ssa_for_src(b, call->params[i],
  72                                      call->callee->params[i].num_components);
  73       }
  74
  75       nir_foreach_block(block, callee_copy) {
  76          nir_foreach_instr_safe(instr, block) {
  77             if (instr->type != nir_instr_type_intrinsic)
  78                continue;
  79
  80             nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
  81             if (load->intrinsic != nir_intrinsic_load_param)
  82                continue;
  83
  84             unsigned param_idx = nir_intrinsic_param_idx(load);
  85             assert(param_idx < num_params);
  86             assert(load->dest.is_ssa);
  87             nir_ssa_def_rewrite_uses(&load->dest.ssa,
  88                                      nir_src_for_ssa(params[param_idx]));
  89
  90             /* Remove any left-over load_param intrinsics because they're soon
  91              * to be in another function and therefore no longer valid.
  92              */
  93             nir_instr_remove(&load->instr);
  94          }
  95       }
  96
  97       /* Pluck the body out of the function and place it here */
  98       nir_cf_list body;
  99       nir_cf_list_extract(&body, &callee_copy->body);
 100       nir_cf_reinsert(&body, b->cursor);
 101
 102       nir_instr_remove(&call->instr);
 103    }
 104
 105    return progress;
 106 }
 107
 108 static bool
 109 inline_function_impl(nir_function_impl *impl, struct set *inlined)
 110 {
 111    if (_mesa_set_search(inlined, impl))
 112       return false; /* Already inlined */
 113
 114    nir_builder b;
 115    nir_builder_init(&b, impl);
 116
 117    bool progress = false;
 118    nir_foreach_block_safe(block, impl) {
 119       progress |= inline_functions_block(block, &b, inlined);
 120    }
 121
 122    if (progress) {
 123       /* SSA and register indices are completely messed up now */
 124       nir_index_ssa_defs(impl);
 125       nir_index_local_regs(impl);
 126
 127       nir_metadata_preserve(impl, nir_metadata_none);
 128    }
 129
 130    _mesa_set_add(inlined, impl);
 131
 132    return progress;
 133 }
 134
 135 /** A pass to inline all functions in a shader into their callers
 136  *
 137  * For most use-cases, function inlining is a multi-step process.  The general
 138  * pattern employed by SPIR-V consumers and others is as follows:
 139  *
 140  *  1. nir_lower_constant_initializers(shader, nir_var_local)
 141  *
 142  *     This is needed because local variables from the callee are simply added
 143  *     to the locals list for the caller and the information about where the
 144  *     constant initializer logically happens is lost.  If the callee is
 145  *     called in a loop, this can cause the variable to go from being
 146  *     initialized once per loop iteration to being initialized once at the
 147  *     top of the caller and values to persist from one invocation of the
 148  *     callee to the next.  The simple solution to this problem is to get rid
 149  *     of constant initializers before function inlining.
 150  *
 151  *  2. nir_lower_returns(shader)
 152  *
 153  *     nir_inline_functions assumes that all functions end "naturally" by
 154  *     execution reaching the end of the function without any return
 155  *     instructions causing instant jumps to the end.  Thanks to NIR being
 156  *     structured, we can't represent arbitrary jumps to various points in the
 157  *     program which is what an early return in the callee would have to turn
 158  *     into when we inline it into the caller.  Instead, we require returns to
 159  *     be lowered which lets us just copy+paste the callee directly into the
 160  *     caller.
 161  *
 162  *  3. nir_inline_functions(shader)
 163  *
 164  *     This does the actual function inlining and the resulting shader will
 165  *     contain no call instructions.
 166  *
 167  *  4. nir_opt_deref(shader)
 168  *
 169  *     Most functions contain pointer parameters where the result of a deref
 170  *     instruction is passed in as a parameter, loaded via a load_param
 171  *     intrinsic, and then turned back into a deref via a cast.  Function
 172  *     inlining will get rid of the load_param but we are still left with a
 173  *     cast.  Running nir_opt_deref gets rid of the intermediate cast and
 174  *     results in a whole deref chain again.  This is currently required by a
 175  *     number of optimizations and lowering passes at least for certain
 176  *     variable modes.
 177  *
 178  *  5. Loop over the functions and delete all but the main entrypoint.
 179  *
 180  *     In the Intel Vulkan driver this looks like this:
 181  *
 182  *        foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
 183  *           if (func != entry_point)
 184  *              exec_node_remove(&func->node);
 185  *        }
 186  *        assert(exec_list_length(&nir->functions) == 1);
 187  *
 188  *    While nir_inline_functions does get rid of all call instructions, it
 189  *    doesn't get rid of any functions because it doesn't know what the "root
 190  *    function" is.  Instead, it's up to the individual driver to know how to
 191  *    decide on a root function and delete the rest.  With SPIR-V,
 192  *    spirv_to_nir returns the root function and so we can just use == whereas
 193  *    with GL, you may have to look for a function named "main".
 194  *
 195  *  6. nir_lower_constant_initializers(shader, ~nir_var_local)
 196  *
 197  *     Lowering constant initializers on inputs, outputs, global variables,
 198  *     etc. requires that we know the main entrypoint so that we know where to
 199  *     initialize them.  Otherwise, we would have to assume that anything
 200  *     could be a main entrypoint and initialize them at the start of every
 201  *     function but that would clearly be wrong if any of those functions were
 202  *     ever called within another function.  Simply requiring a single-
 203  *     entrypoint function shader is the best way to make it well-defined.
 204  */
 205 bool
 206 nir_inline_functions(nir_shader *shader)
 207 {
 208    struct set *inlined = _mesa_set_create(NULL, _mesa_hash_pointer,
 209                                           _mesa_key_pointer_equal);
 210    bool progress = false;
 211
 212    nir_foreach_function(function, shader) {
 213       if (function->impl)
 214          progress = inline_function_impl(function->impl, inlined) || progress;
 215    }
 216
 217    _mesa_set_destroy(inlined, NULL);
 218
 219    return progress;
 220 }