src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  * Copyright © 2015 Red Hat
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Jason Ekstrand (jason@jlekstrand.net)
  26  *    Rob Clark (robclark@freedesktop.org)
  27  *
  28  */
  29
  30 #include "ir3_nir.h"
  31 #include "compiler/nir/nir_builder.h"
  32 #include "compiler/nir/nir_control_flow.h"
  33
  34 /* Based on nir_opt_peephole_select, and hacked up to more aggressively
  35  * flatten anything that can be flattened
  36  *
  37  * This *might* be something that other drivers could use.  On the other
  38  * hand, I think most other hw has predicated instructions or similar
  39  * to select which side of if/else writes back result (and therefore
  40  * not having to assign unique registers to both sides of the if/else.
  41  * (And hopefully those drivers don't also have crazy scheduling reqs
  42  * and can more easily do this in their backend.)
  43  *
  44  * TODO eventually when we have proper flow control in the backend:
  45  *
  46  *  + Probably weight differently normal ALUs vs SFUs (cos/rcp/exp)
  47  *    since executing extra SFUs for the branch-not-taken path will
  48  *    generally be much more expensive.
  49  *
  50  *    Possibly what constitutes an ALU vs SFU differs between hw
  51  *    backends.. but that seems doubtful.
  52  *
  53  *  + Account for texture fetch and memory accesses (incl UBOs)
  54  *    since these will be more expensive..
  55  *
  56  *  + When if-condition is const (or uniform) or we have some way
  57  *    to know that all threads in the warp take the same branch
  58  *    then we should prefer to not flatten the if/else..
  59  */
  60
  61 static bool
  62 valid_dest(nir_block *block, nir_dest *dest)
  63 {
  64         /* It must be SSA */
  65         if (!dest->is_ssa)
  66                 return false;
  67
  68         /* We only lower blocks that do not contain other blocks
  69          * (so this is run iteratively in a loop).  Therefore if
  70          * we get this far, it should not have any if_uses:
  71          */
  72         assert(list_empty(&dest->ssa.if_uses));
  73
  74         /* The only uses of this definition must be phi's in the
  75          * successor or in the current block
  76          */
  77         nir_foreach_use(use, &dest->ssa) {
  78                 nir_instr *dest_instr = use->parent_instr;
  79                 if (dest_instr->block == block)
  80                         continue;
  81                 if ((dest_instr->type == nir_instr_type_phi) &&
  82                                 (dest_instr->block == block->successors[0]))
  83                         continue;
  84                 return false;
  85         }
  86
  87         return true;
  88 }
  89
  90 static bool
  91 block_check_for_allowed_instrs(nir_block *block)
  92 {
  93         nir_foreach_instr(instr, block) {
  94                 switch (instr->type) {
  95                 case nir_instr_type_intrinsic: {
  96                         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
  97                         const nir_intrinsic_info *info =
  98                                         &nir_intrinsic_infos[intr->intrinsic];
  99
 100                         switch (intr->intrinsic) {
 101                         case nir_intrinsic_discard_if:
 102                                 /* to simplify things, we want discard_if src in ssa: */
 103                                 if (!intr->src[0].is_ssa)
 104                                         return false;
 105                                 /* fallthrough */
 106                         case nir_intrinsic_discard:
 107                                 /* discard/discard_if can be reordered, but only
 108                                  * with some special care
 109                                  */
 110                                 break;
 111                         case nir_intrinsic_store_output:
 112                                 /* TODO technically, if both if and else store
 113                                  * the same output, we can hoist that out to
 114                                  * the end of the block w/ a phi..
 115                                  * In practice, the tgsi shaders we already get
 116                                  * do this for us, so I think we don't need to
 117                                  */
 118                         default:
 119                                 if (!(info->flags & NIR_INTRINSIC_CAN_REORDER))
 120                                         return false;
 121                         }
 122
 123                         break;
 124                 }
 125
 126                 case nir_instr_type_tex: {
 127                         nir_tex_instr *tex = nir_instr_as_tex(instr);
 128                         if (!valid_dest(block, &tex->dest))
 129                                 return false;
 130                         break;
 131                 }
 132                 case nir_instr_type_phi: {
 133                         nir_phi_instr *phi = nir_instr_as_phi(instr);
 134                         if (!valid_dest(block, &phi->dest))
 135                                 return false;
 136                         break;
 137                 }
 138                 case nir_instr_type_alu: {
 139                         nir_alu_instr *alu = nir_instr_as_alu(instr);
 140                         if (!valid_dest(block, &alu->dest.dest))
 141                                 return false;
 142                         break;
 143                 }
 144
 145                 case nir_instr_type_load_const:
 146                 case nir_instr_type_ssa_undef:
 147                         break; /* always ssa dest */
 148
 149                 default:
 150                         return false;
 151                 }
 152         }
 153
 154         return true;
 155 }
 156
 157 /* flatten an then or else block: */
 158 static void
 159 flatten_block(nir_builder *bld, nir_block *if_block, nir_block *prev_block,
 160                 nir_ssa_def *condition, bool invert)
 161 {
 162         nir_foreach_instr_safe(instr, if_block) {
 163                 if (instr->type == nir_instr_type_intrinsic) {
 164                         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 165                         if ((intr->intrinsic == nir_intrinsic_discard) ||
 166                                         (intr->intrinsic == nir_intrinsic_discard_if)) {
 167                                 nir_ssa_def *discard_cond;
 168
 169                                 bld->cursor = nir_after_instr(
 170                                                 nir_block_last_instr(prev_block));
 171
 172                                 if (invert) {
 173                                         condition = nir_inot(bld, condition);
 174                                         invert = false;
 175                                 }
 176
 177                                 if (intr->intrinsic == nir_intrinsic_discard) {
 178                                         discard_cond = condition;
 179                                 } else {
 180                                         assert(intr->src[0].is_ssa);
 181                                         /* discard_if gets re-written w/ src and'd: */
 182                                         discard_cond = nir_iand(bld, condition, intr->src[0].ssa);
 183                                 }
 184
 185                                 nir_intrinsic_instr *discard_if =
 186                                                 nir_intrinsic_instr_create(bld->shader,
 187                                                                 nir_intrinsic_discard_if);
 188                                 discard_if->src[0] = nir_src_for_ssa(discard_cond);
 189
 190                                 nir_instr_insert_after(nir_block_last_instr(prev_block),
 191                                                 &discard_if->instr);
 192                                 nir_instr_remove(instr);
 193                                 instr = NULL;
 194                         }
 195                 }
 196                 /* if not an handled specially, just move to prev block: */
 197                 if (instr) {
 198                         /* NOTE: exec_node_remove() is safe here (vs nir_instr_remove()
 199                          * since we are re-adding the instructin back in to the prev
 200                          * block (so no dangling SSA uses)
 201                          */
 202                         exec_node_remove(&instr->node);
 203                         instr->block = prev_block;
 204                         exec_list_push_tail(&prev_block->instr_list, &instr->node);
 205                 }
 206         }
 207 }
 208
 209 static bool
 210 lower_if_else_block(nir_block *block, nir_builder *b, void *mem_ctx)
 211 {
 212         /* If the block is empty, then it certainly doesn't have any phi nodes,
 213          * so we can skip it.  This also ensures that we do an early skip on the
 214          * end block of the function which isn't actually attached to the CFG.
 215          */
 216         if (exec_list_is_empty(&block->instr_list))
 217                 return false;
 218
 219         if (nir_cf_node_is_first(&block->cf_node))
 220                 return false;
 221
 222         nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
 223         if (prev_node->type != nir_cf_node_if)
 224                 return false;
 225
 226         nir_if *if_stmt = nir_cf_node_as_if(prev_node);
 227         nir_block *then_block = nir_if_first_then_block(if_stmt);
 228         nir_block *else_block = nir_if_first_else_block(if_stmt);
 229
 230         /* We can only have one block in each side ... */
 231         if (nir_if_last_then_block(if_stmt) != then_block ||
 232                         nir_if_last_else_block(if_stmt) != else_block)
 233                 return false;
 234
 235         /* ... and those blocks must only contain "allowed" instructions. */
 236         if (!block_check_for_allowed_instrs(then_block) ||
 237                         !block_check_for_allowed_instrs(else_block))
 238                 return false;
 239
 240         /* condition should be ssa too, which simplifies flatten_block: */
 241         if (!if_stmt->condition.is_ssa)
 242                 return false;
 243
 244         /* At this point, we know that the previous CFG node is an if-then
 245          * statement containing only moves to phi nodes in this block.  We can
 246          * just remove that entire CF node and replace all of the phi nodes with
 247          * selects.
 248          */
 249
 250         nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node));
 251         assert(prev_block->cf_node.type == nir_cf_node_block);
 252
 253         /* First, we move the remaining instructions from the blocks to the
 254          * block before.  There are a few things that need handling specially
 255          * like discard/discard_if.
 256          */
 257         flatten_block(b, then_block, prev_block,
 258                         if_stmt->condition.ssa, false);
 259         flatten_block(b, else_block, prev_block,
 260                         if_stmt->condition.ssa, true);
 261
 262         nir_foreach_instr_safe(instr, block) {
 263                 if (instr->type != nir_instr_type_phi)
 264                         break;
 265
 266                 nir_phi_instr *phi = nir_instr_as_phi(instr);
 267                 nir_alu_instr *sel = nir_alu_instr_create(mem_ctx, nir_op_bcsel);
 268                 nir_src_copy(&sel->src[0].src, &if_stmt->condition, mem_ctx);
 269                 /* Splat the condition to all channels */
 270                 memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
 271
 272                 assert(exec_list_length(&phi->srcs) == 2);
 273                 nir_foreach_phi_src(src, phi) {
 274                         assert(src->pred == then_block || src->pred == else_block);
 275                         assert(src->src.is_ssa);
 276
 277                         unsigned idx = src->pred == then_block ? 1 : 2;
 278                         nir_src_copy(&sel->src[idx].src, &src->src, mem_ctx);
 279                 }
 280
 281                 nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
 282                                 phi->dest.ssa.num_components, 32, phi->dest.ssa.name);
 283                 sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
 284
 285                 nir_ssa_def_rewrite_uses(&phi->dest.ssa,
 286                                 nir_src_for_ssa(&sel->dest.dest.ssa));
 287
 288                 nir_instr_insert_before(&phi->instr, &sel->instr);
 289                 nir_instr_remove(&phi->instr);
 290         }
 291
 292         nir_cf_node_remove(&if_stmt->cf_node);
 293         return true;
 294 }
 295
 296 static bool
 297 lower_if_else_impl(nir_function_impl *impl)
 298 {
 299         void *mem_ctx = ralloc_parent(impl);
 300         nir_builder b;
 301         nir_builder_init(&b, impl);
 302
 303         bool progress = false;
 304         nir_foreach_block_safe(block, impl) {
 305                 progress |= lower_if_else_block(block, &b, mem_ctx);
 306         }
 307
 308         if (progress)
 309                 nir_metadata_preserve(impl, nir_metadata_none);
 310
 311         return progress;
 312 }
 313
 314 bool
 315 ir3_nir_lower_if_else(nir_shader *shader)
 316 {
 317         bool progress = false;
 318
 319         nir_foreach_function(function, shader) {
 320                 if (function->impl)
 321                         progress |= lower_if_else_impl(function->impl);
 322         }
 323
 324         return progress;
 325 }