src/gallium/drivers/lima/ir/gp/reduce_scheduler.c

   1 /*
   2  * Copyright (c) 2017 Lima Project
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include <limits.h>
  26
  27 #include "gpir.h"
  28
  29 /* Register sensitive schedule algorithm from paper:
  30  * "Register-Sensitive Selection, Duplication, and Sequencing of Instructions"
  31  * Author: Vivek Sarkar,  Mauricio J. Serrano,  Barbara B. Simons
  32  */
  33
  34 static void schedule_calc_sched_info(gpir_node *node)
  35 {
  36    int n = 0;
  37    float extra_reg = 1.0f;
  38
  39    /* update all children's sched info */
  40    gpir_node_foreach_pred(node, dep) {
  41       gpir_node *pred = dep->pred;
  42
  43       if (pred->rsched.reg_pressure < 0)
  44          schedule_calc_sched_info(pred);
  45
  46       int est = pred->rsched.est + 1;
  47       if (node->rsched.est < est)
  48          node->rsched.est = est;
  49
  50       float reg_weight = 1.0f - 1.0f / list_length(&pred->succ_list);
  51       if (extra_reg > reg_weight)
  52          extra_reg = reg_weight;
  53
  54       n++;
  55    }
  56
  57    /* leaf instr */
  58    if (!n) {
  59       node->rsched.reg_pressure = 0;
  60       return;
  61    }
  62
  63    int i = 0;
  64    float reg[n];
  65    gpir_node_foreach_pred(node, dep) {
  66       gpir_node *pred = dep->pred;
  67       reg[i++] = pred->rsched.reg_pressure;
  68    }
  69
  70    /* sort */
  71    for (i = 0; i < n - 1; i++) {
  72       for (int j = 0; j < n - i - 1; j++) {
  73          if (reg[j] > reg[j + 1]) {
  74             float tmp = reg[j + 1];
  75             reg[j + 1] = reg[j];
  76             reg[j] = tmp;
  77          }
  78       }
  79    }
  80
  81    for (i = 0; i < n; i++) {
  82       float pressure = reg[i] + n - (i + 1);
  83       if (pressure > node->rsched.reg_pressure)
  84          node->rsched.reg_pressure = pressure;
  85    }
  86
  87    /* If all children of this node have multi parents, then this
  88     * node need an extra reg to store its result. For example,
  89     * it's not fair for parent has the same reg pressure as child
  90     * if n==1 and child's successor>1, because we need 2 reg for
  91     * this.
  92     *
  93     * But we can't add a full reg to the reg_pressure, because the
  94     * last parent of a multi-successor child doesn't need an extra
  95     * reg. For example, a single child (with multi successor) node
  96     * should has less reg pressure than a two children (with single
  97     * successor) instr.
  98     *
  99     * extra reg = min(all child)(1.0 - 1.0 / num successor)
 100     */
 101    node->rsched.reg_pressure += extra_reg;
 102 }
 103
 104 static void schedule_insert_ready_list(struct list_head *ready_list,
 105                                        gpir_node *insert_node)
 106 {
 107    struct list_head *insert_pos = ready_list;
 108
 109    list_for_each_entry(gpir_node, node, ready_list, list) {
 110       if (gpir_op_infos[node->op].schedule_first) {
 111          continue;
 112       }
 113
 114       if (gpir_op_infos[insert_node->op].schedule_first ||
 115           insert_node->rsched.parent_index < node->rsched.parent_index ||
 116           (insert_node->rsched.parent_index == node->rsched.parent_index &&
 117            (insert_node->rsched.reg_pressure < node->rsched.reg_pressure ||
 118             (insert_node->rsched.reg_pressure == node->rsched.reg_pressure &&
 119              (insert_node->rsched.est >= node->rsched.est))))) {
 120          insert_pos = &node->list;
 121          break;
 122       }
 123    }
 124
 125    list_del(&insert_node->list);
 126    list_addtail(&insert_node->list, insert_pos);
 127 }
 128
 129 static void schedule_ready_list(gpir_block *block, struct list_head *ready_list)
 130 {
 131    if (list_is_empty(ready_list))
 132       return;
 133
 134    gpir_node *node = list_first_entry(ready_list, gpir_node, list);
 135    list_del(&node->list);
 136
 137    /* schedule the node to the block node list */
 138    list_add(&node->list, &block->node_list);
 139    node->rsched.scheduled = true;
 140    block->rsched.node_index--;
 141
 142    gpir_node_foreach_pred(node, dep) {
 143       gpir_node *pred = dep->pred;
 144       pred->rsched.parent_index = block->rsched.node_index;
 145
 146       bool ready = true;
 147       gpir_node_foreach_succ(pred, dep) {
 148          gpir_node *succ = dep->succ;
 149          if (!succ->rsched.scheduled) {
 150             ready = false;
 151             break;
 152          }
 153       }
 154       /* all successor have been scheduled */
 155       if (ready)
 156          schedule_insert_ready_list(ready_list, pred);
 157    }
 158
 159    schedule_ready_list(block, ready_list);
 160 }
 161
 162 static void schedule_block(gpir_block *block)
 163 {
 164    /* move all nodes to node_list, block->node_list will
 165     * contain schedule result */
 166    struct list_head node_list;
 167    list_replace(&block->node_list, &node_list);
 168    list_inithead(&block->node_list);
 169
 170    /* step 2 & 3 */
 171    list_for_each_entry(gpir_node, node, &node_list, list) {
 172       if (gpir_node_is_root(node))
 173          schedule_calc_sched_info(node);
 174       block->rsched.node_index++;
 175    }
 176
 177    /* step 4 */
 178    struct list_head ready_list;
 179    list_inithead(&ready_list);
 180
 181    /* step 5 */
 182    list_for_each_entry_safe(gpir_node, node, &node_list, list) {
 183       if (gpir_node_is_root(node)) {
 184          node->rsched.parent_index = INT_MAX;
 185          schedule_insert_ready_list(&ready_list, node);
 186       }
 187    }
 188
 189    /* step 6 */
 190    schedule_ready_list(block, &ready_list);
 191 }
 192
 193 /* Due to how we translate from NIR, we never read a register written in the
 194  * same block (we just pass the node through instead), so we don't have to
 195  * worry about read-after-write dependencies. We do have to worry about
 196  * write-after-read though, so we add those dependencies now. For example in a
 197  * loop like this we need a dependency between the write and the read of i:
 198  *
 199  * i = ...
 200  * while (...) {
 201  *    ... = i;
 202  *    i = i + 1;
 203  * }
 204  */
 205
 206 static void add_false_dependencies(gpir_compiler *comp)
 207 {
 208    /* Make sure we allocate this only once, in case there are many values and
 209     * many blocks.
 210     */
 211    gpir_node **last_written = calloc(comp->cur_reg, sizeof(gpir_node *));
 212
 213    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
 214       list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
 215          if (node->op == gpir_op_load_reg) {
 216             gpir_load_node *load = gpir_node_to_load(node);
 217             gpir_node *store = last_written[load->reg->index];
 218             if (store && store->block == block) {
 219                gpir_node_add_dep(store, node, GPIR_DEP_WRITE_AFTER_READ);
 220             }
 221          } else if (node->op == gpir_op_store_reg) {
 222             gpir_store_node *store = gpir_node_to_store(node);
 223             last_written[store->reg->index] = node;
 224          }
 225       }
 226    }
 227
 228    free(last_written);
 229 }
 230
 231 bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp)
 232 {
 233    add_false_dependencies(comp);
 234
 235    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
 236       block->rsched.node_index = 0;
 237       list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
 238          node->rsched.reg_pressure = -1;
 239          node->rsched.est = 0;
 240          node->rsched.scheduled = false;
 241       }
 242    }
 243
 244    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
 245       schedule_block(block);
 246    }
 247
 248    gpir_debug("after reduce scheduler\n");
 249    gpir_node_print_prog_seq(comp);
 250    return true;
 251 }