src/gallium/drivers/lima/ir/pp/regalloc.c

   1 /*
   2  * Copyright (c) 2017 Lima Project
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the
  12  * next paragraph) shall be included in all copies or substantial portions
  13  * of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "util/ralloc.h"
  26 #include "util/register_allocate.h"
  27 #include "util/u_debug.h"
  28
  29 #include "ppir.h"
  30 #include "lima_context.h"
  31
  32 #define PPIR_FULL_REG_NUM  6
  33
  34 #define PPIR_VEC1_REG_NUM       (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */
  35 #define PPIR_VEC2_REG_NUM       (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */
  36 #define PPIR_VEC3_REG_NUM       (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */
  37 #define PPIR_VEC4_REG_NUM       PPIR_FULL_REG_NUM       /* xyzw */
  38 #define PPIR_HEAD_VEC1_REG_NUM  PPIR_FULL_REG_NUM       /* x */
  39 #define PPIR_HEAD_VEC2_REG_NUM  PPIR_FULL_REG_NUM       /* xy */
  40 #define PPIR_HEAD_VEC3_REG_NUM  PPIR_FULL_REG_NUM       /* xyz */
  41 #define PPIR_HEAD_VEC4_REG_NUM  PPIR_FULL_REG_NUM       /* xyzw */
  42
  43 #define PPIR_VEC1_REG_BASE       0
  44 #define PPIR_VEC2_REG_BASE       (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM)
  45 #define PPIR_VEC3_REG_BASE       (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM)
  46 #define PPIR_VEC4_REG_BASE       (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM)
  47 #define PPIR_HEAD_VEC1_REG_BASE  (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM)
  48 #define PPIR_HEAD_VEC2_REG_BASE  (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM)
  49 #define PPIR_HEAD_VEC3_REG_BASE  (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM)
  50 #define PPIR_HEAD_VEC4_REG_BASE  (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM)
  51 #define PPIR_REG_COUNT           (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM)
  52
  53 enum ppir_ra_reg_class {
  54    ppir_ra_reg_class_vec1,
  55    ppir_ra_reg_class_vec2,
  56    ppir_ra_reg_class_vec3,
  57    ppir_ra_reg_class_vec4,
  58
  59    /* 4 reg class for load/store instr regs:
  60     * load/store instr has no swizzle field, so the (virtual) register
  61     * must be allocated at the beginning of a (physical) register,
  62     */
  63    ppir_ra_reg_class_head_vec1,
  64    ppir_ra_reg_class_head_vec2,
  65    ppir_ra_reg_class_head_vec3,
  66    ppir_ra_reg_class_head_vec4,
  67
  68    ppir_ra_reg_class_num,
  69 };
  70
  71 static const int ppir_ra_reg_base[ppir_ra_reg_class_num + 1] = {
  72    [ppir_ra_reg_class_vec1]       = PPIR_VEC1_REG_BASE,
  73    [ppir_ra_reg_class_vec2]       = PPIR_VEC2_REG_BASE,
  74    [ppir_ra_reg_class_vec3]       = PPIR_VEC3_REG_BASE,
  75    [ppir_ra_reg_class_vec4]       = PPIR_VEC4_REG_BASE,
  76    [ppir_ra_reg_class_head_vec1]  = PPIR_HEAD_VEC1_REG_BASE,
  77    [ppir_ra_reg_class_head_vec2]  = PPIR_HEAD_VEC2_REG_BASE,
  78    [ppir_ra_reg_class_head_vec3]  = PPIR_HEAD_VEC3_REG_BASE,
  79    [ppir_ra_reg_class_head_vec4]  = PPIR_HEAD_VEC4_REG_BASE,
  80    [ppir_ra_reg_class_num]        = PPIR_REG_COUNT,
  81 };
  82
  83 static unsigned int *
  84 ppir_ra_reg_q_values[ppir_ra_reg_class_num] = {
  85    (unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4},
  86    (unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3},
  87    (unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2},
  88    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
  89    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
  90    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
  91    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
  92    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
  93 };
  94
  95 struct ra_regs *ppir_regalloc_init(void *mem_ctx)
  96 {
  97    struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false);
  98    if (!ret)
  99       return NULL;
 100
 101    /* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */
 102    static const int class_reg_num[ppir_ra_reg_class_num] = {
 103       4, 3, 2, 1, 1, 1, 1, 1,
 104    };
 105    /* base reg (x, y, z, w) confliction with other regs */
 106    for (int h = 0; h < 4; h++) {
 107       int base_reg_mask = 1 << h;
 108       for (int i = 1; i < ppir_ra_reg_class_num; i++) {
 109          int class_reg_base_mask = (1 << ((i % 4) + 1)) - 1;
 110          for (int j = 0; j < class_reg_num[i]; j++) {
 111             if (base_reg_mask & (class_reg_base_mask << j)) {
 112                for (int k = 0; k < PPIR_FULL_REG_NUM; k++) {
 113                   ra_add_reg_conflict(ret, k * 4 + h,
 114                      ppir_ra_reg_base[i] + k * class_reg_num[i] + j);
 115                }
 116             }
 117          }
 118       }
 119    }
 120    /* build all other confliction by the base reg confliction */
 121    for (int i = 0; i < PPIR_VEC1_REG_NUM; i++)
 122       ra_make_reg_conflicts_transitive(ret, i);
 123
 124    for (int i = 0; i < ppir_ra_reg_class_num; i++)
 125       ra_alloc_reg_class(ret);
 126
 127    int reg_index = 0;
 128    for (int i = 0; i < ppir_ra_reg_class_num; i++) {
 129       while (reg_index < ppir_ra_reg_base[i + 1])
 130          ra_class_add_reg(ret, i, reg_index++);
 131    }
 132
 133    ra_set_finalize(ret, ppir_ra_reg_q_values);
 134    return ret;
 135 }
 136
 137 static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
 138 {
 139    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 140       list_for_each_entry(ppir_node, node, &block->node_list, list) {
 141          if (node->op == ppir_op_store_color)
 142             continue;
 143
 144          if (!node->instr || node->op == ppir_op_const)
 145             continue;
 146
 147          ppir_dest *dest = ppir_node_get_dest(node);
 148          if (dest) {
 149             ppir_reg *reg = NULL;
 150
 151             if (dest->type == ppir_target_ssa) {
 152                reg = &dest->ssa;
 153                list_addtail(&reg->list, &comp->reg_list);
 154             }
 155          }
 156       }
 157    }
 158 }
 159
 160 static int get_phy_reg_index(int reg)
 161 {
 162    int i;
 163
 164    for (i = 0; i < ppir_ra_reg_class_num; i++) {
 165       if (reg < ppir_ra_reg_base[i + 1]) {
 166          reg -= ppir_ra_reg_base[i];
 167          break;
 168       }
 169    }
 170
 171    if (i < ppir_ra_reg_class_head_vec1)
 172       return reg / (4 - i) * 4 + reg % (4 - i);
 173    else
 174       return reg * 4;
 175 }
 176
 177 static void ppir_regalloc_print_result(ppir_compiler *comp)
 178 {
 179    printf("======ppir regalloc result======\n");
 180    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 181       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 182          printf("%03d:", instr->index);
 183          for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 184             ppir_node *node = instr->slots[i];
 185             if (!node)
 186                continue;
 187
 188             printf(" (%d|", node->index);
 189
 190             ppir_dest *dest = ppir_node_get_dest(node);
 191             if (dest)
 192                printf("%d", ppir_target_get_dest_reg_index(dest));
 193
 194             printf("|");
 195
 196             for (int i = 0; i < ppir_node_get_src_num(node); i++) {
 197                if (i)
 198                   printf(" ");
 199                printf("%d", ppir_target_get_src_reg_index(ppir_node_get_src(node, i)));
 200             }
 201
 202             printf(")");
 203          }
 204          printf("\n");
 205       }
 206    }
 207    printf("--------------------------\n");
 208 }
 209
 210 static bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
 211                                    ppir_node *node)
 212 {
 213    ppir_instr *newinstr = ppir_instr_create(block);
 214    if (unlikely(!newinstr))
 215       return false;
 216
 217    list_del(&newinstr->list);
 218    list_add(&newinstr->list, &ref->list);
 219
 220    if (!ppir_instr_insert_node(newinstr, node))
 221       return false;
 222
 223    list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
 224       instr->seq++;
 225    }
 226    newinstr->seq = ref->seq+1;
 227    newinstr->scheduled = true;
 228    return true;
 229 }
 230
 231 static bool create_new_instr_before(ppir_block *block, ppir_instr *ref,
 232                                     ppir_node *node)
 233 {
 234    ppir_instr *newinstr = ppir_instr_create(block);
 235    if (unlikely(!newinstr))
 236       return false;
 237
 238    list_del(&newinstr->list);
 239    list_addtail(&newinstr->list, &ref->list);
 240
 241    if (!ppir_instr_insert_node(newinstr, node))
 242       return false;
 243
 244    list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
 245       instr->seq++;
 246    }
 247    newinstr->seq = ref->seq-1;
 248    newinstr->scheduled = true;
 249    return true;
 250 }
 251
 252 static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block,
 253                                     ppir_node *node, ppir_src *src,
 254                                     ppir_node **fill_node)
 255 {
 256    /* nodes might have multiple references to the same value.
 257     * avoid creating unnecessary loads for the same fill by
 258     * saving the node resulting from the temporary load */
 259    if (*fill_node)
 260       goto update_src;
 261
 262    int num_components = src->reg->num_components;
 263
 264    /* alloc new node to load value */
 265    ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
 266    if (!load_node)
 267       return false;
 268    list_addtail(&load_node->list, &node->list);
 269    comp->num_fills++;
 270
 271    ppir_load_node *load = ppir_node_to_load(load_node);
 272
 273    load->index = -comp->prog->stack_size; /* index sizes are negative */
 274    load->num_components = num_components;
 275
 276    ppir_dest *ld_dest = &load->dest;
 277    ld_dest->type = ppir_target_pipeline;
 278    ld_dest->pipeline = ppir_pipeline_reg_uniform;
 279    ld_dest->write_mask = u_bit_consecutive(0, num_components);
 280
 281    /* If the uniform slot is empty, we can insert the load_temp
 282     * there and use it directly. Exceptionally, if the node is in the
 283     * varying or texld slot, this doesn't work. */
 284    if (!node->instr->slots[PPIR_INSTR_SLOT_UNIFORM] &&
 285         node->instr_pos != PPIR_INSTR_SLOT_VARYING &&
 286         node->instr_pos != PPIR_INSTR_SLOT_TEXLD) {
 287       ppir_node_target_assign(src, load_node);
 288       *fill_node = load_node;
 289       return ppir_instr_insert_node(node->instr, load_node);
 290    }
 291
 292    /* Uniform slot was taken, so fall back to a new instruction with a mov */
 293    if (!create_new_instr_before(block, node->instr, load_node))
 294       return false;
 295
 296    /* Create move node */
 297    ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
 298    if (unlikely(!move_node))
 299       return false;
 300    list_addtail(&move_node->list, &node->list);
 301
 302    ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
 303
 304    move_alu->num_src = 1;
 305    move_alu->src->type = ppir_target_pipeline;
 306    move_alu->src->pipeline = ppir_pipeline_reg_uniform;
 307    for (int i = 0; i < 4; i++)
 308       move_alu->src->swizzle[i] = i;
 309
 310    ppir_dest *alu_dest = &move_alu->dest;
 311    alu_dest->type = ppir_target_ssa;
 312    alu_dest->ssa.num_components = num_components;
 313    alu_dest->ssa.spilled = true;
 314    alu_dest->write_mask = u_bit_consecutive(0, num_components);
 315
 316    list_addtail(&alu_dest->ssa.list, &comp->reg_list);
 317
 318    if (!ppir_instr_insert_node(load_node->instr, move_node))
 319       return false;
 320
 321    /* insert the new node as predecessor */
 322    ppir_node_foreach_pred_safe(node, dep) {
 323       ppir_node *pred = dep->pred;
 324       ppir_node_remove_dep(dep);
 325       ppir_node_add_dep(load_node, pred, ppir_dep_src);
 326    }
 327    ppir_node_add_dep(node, move_node, ppir_dep_src);
 328    ppir_node_add_dep(move_node, load_node, ppir_dep_src);
 329
 330    *fill_node = move_node;
 331
 332 update_src:
 333    /* switch node src to use the fill node dest */
 334    ppir_node_target_assign(src, *fill_node);
 335
 336    return true;
 337 }
 338
 339 static bool ppir_update_spilled_dest_load(ppir_compiler *comp, ppir_block *block,
 340                                           ppir_node *node)
 341 {
 342    ppir_dest *dest = ppir_node_get_dest(node);
 343    assert(dest != NULL);
 344    assert(dest->type == ppir_target_register);
 345    ppir_reg *reg = dest->reg;
 346    int num_components = reg->num_components;
 347
 348    /* alloc new node to load value */
 349    ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
 350    if (!load_node)
 351       return NULL;
 352    list_addtail(&load_node->list, &node->list);
 353    comp->num_fills++;
 354
 355    ppir_load_node *load = ppir_node_to_load(load_node);
 356
 357    load->index = -comp->prog->stack_size; /* index sizes are negative */
 358    load->num_components = num_components;
 359
 360    load->dest.type = ppir_target_pipeline;
 361    load->dest.pipeline = ppir_pipeline_reg_uniform;
 362    load->dest.write_mask = u_bit_consecutive(0, num_components);
 363
 364    /* New instruction is needed since we're updating a dest register
 365     * and we can't write to the uniform pipeline reg */
 366    if (!create_new_instr_before(block, node->instr, load_node))
 367       return false;
 368
 369    /* Create move node */
 370    ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
 371    if (unlikely(!move_node))
 372       return false;
 373    list_addtail(&move_node->list, &node->list);
 374
 375    ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
 376
 377    move_alu->num_src = 1;
 378    move_alu->src->type = ppir_target_pipeline;
 379    move_alu->src->pipeline = ppir_pipeline_reg_uniform;
 380    for (int i = 0; i < 4; i++)
 381       move_alu->src->swizzle[i] = i;
 382
 383    move_alu->dest.type = ppir_target_register;
 384    move_alu->dest.reg = reg;
 385    move_alu->dest.write_mask = u_bit_consecutive(0, num_components);
 386
 387    if (!ppir_instr_insert_node(load_node->instr, move_node))
 388       return false;
 389
 390    ppir_node_foreach_pred_safe(node, dep) {
 391       ppir_node *pred = dep->pred;
 392       ppir_node_remove_dep(dep);
 393       ppir_node_add_dep(load_node, pred, ppir_dep_src);
 394    }
 395    ppir_node_add_dep(node, move_node, ppir_dep_src);
 396    ppir_node_add_dep(move_node, load_node, ppir_dep_src);
 397
 398    return true;
 399 }
 400
 401 static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block,
 402                                      ppir_node *node)
 403 {
 404    ppir_dest *dest = ppir_node_get_dest(node);
 405    assert(dest != NULL);
 406    ppir_reg *reg = ppir_dest_get_reg(dest);
 407
 408    /* alloc new node to store value */
 409    ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0);
 410    if (!store_node)
 411       return false;
 412    list_addtail(&store_node->list, &node->list);
 413    comp->num_spills++;
 414
 415    ppir_store_node *store = ppir_node_to_store(store_node);
 416
 417    store->index = -comp->prog->stack_size; /* index sizes are negative */
 418
 419    ppir_node_target_assign(&store->src, node);
 420    store->num_components = reg->num_components;
 421
 422    /* insert the new node as successor */
 423    ppir_node_foreach_succ_safe(node, dep) {
 424       ppir_node *succ = dep->succ;
 425       ppir_node_remove_dep(dep);
 426       ppir_node_add_dep(succ, store_node, ppir_dep_src);
 427    }
 428    ppir_node_add_dep(store_node, node, ppir_dep_src);
 429
 430    /* If the store temp slot is empty, we can insert the store_temp
 431     * there and use it directly. Exceptionally, if the node is in the
 432     * combine slot, this doesn't work. */
 433    if (!node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP] &&
 434         node->instr_pos != PPIR_INSTR_SLOT_ALU_COMBINE)
 435       return ppir_instr_insert_node(node->instr, store_node);
 436
 437    /* Not possible to merge store, so fall back to a new instruction */
 438    return create_new_instr_after(block, node->instr, store_node);
 439 }
 440
 441 static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen)
 442 {
 443    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 444       list_for_each_entry(ppir_node, node, &block->node_list, list) {
 445
 446          ppir_dest *dest = ppir_node_get_dest(node);
 447          if (dest && ppir_dest_get_reg(dest) == chosen) {
 448             /* If dest is a register, it might be updating only some its
 449              * components, so need to load the existing value first */
 450             if (dest->type == ppir_target_register) {
 451                if (!ppir_update_spilled_dest_load(comp, block, node))
 452                   return false;
 453             }
 454             if (!ppir_update_spilled_dest(comp, block, node))
 455                return false;
 456          }
 457
 458          ppir_node *fill_node = NULL;
 459          /* nodes might have multiple references to the same value.
 460           * avoid creating unnecessary loads for the same fill by
 461           * saving the node resulting from the temporary load */
 462          for (int i = 0; i < ppir_node_get_src_num(node); i++) {
 463             ppir_src *src = ppir_node_get_src(node, i);
 464             ppir_reg *reg = ppir_src_get_reg(src);
 465             if (reg == chosen) {
 466                if (!ppir_update_spilled_src(comp, block, node, src, &fill_node))
 467                   return false;
 468             }
 469          }
 470       }
 471    }
 472
 473    return true;
 474 }
 475
 476 static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp,
 477                                                  struct ra_graph *g)
 478 {
 479    float spill_costs[list_length(&comp->reg_list)];
 480    /* experimentally determined, it seems to be worth scaling cost of
 481     * regs in instructions that have used uniform/store_temp slots,
 482     * but not too much as to offset the num_components base cost. */
 483    const float slot_scale = 1.1f;
 484
 485    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
 486       if (reg->spilled) {
 487          /* not considered for spilling */
 488          spill_costs[reg->regalloc_index] = 0.0f;
 489          continue;
 490       }
 491
 492       /* It is beneficial to spill registers with higher component number,
 493        * so increase the cost of spilling registers with few components */
 494       float spill_cost = 4.0f / (float)reg->num_components;
 495       spill_costs[reg->regalloc_index] = spill_cost;
 496    }
 497
 498    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 499       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 500          if (instr->slots[PPIR_INSTR_SLOT_UNIFORM]) {
 501             for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 502                ppir_node *node = instr->slots[i];
 503                if (!node)
 504                   continue;
 505                for (int j = 0; j < ppir_node_get_src_num(node); j++) {
 506                   ppir_src *src = ppir_node_get_src(node, j);
 507                   if (!src)
 508                      continue;
 509                   ppir_reg *reg = ppir_src_get_reg(src);
 510                   if (!reg)
 511                      continue;
 512
 513                   spill_costs[reg->regalloc_index] *= slot_scale;
 514                }
 515             }
 516          }
 517          if (instr->slots[PPIR_INSTR_SLOT_STORE_TEMP]) {
 518             for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
 519                ppir_node *node = instr->slots[i];
 520                if (!node)
 521                   continue;
 522                ppir_dest *dest = ppir_node_get_dest(node);
 523                if (!dest)
 524                   continue;
 525                ppir_reg *reg = ppir_dest_get_reg(dest);
 526                if (!reg)
 527                   continue;
 528
 529                spill_costs[reg->regalloc_index] *= slot_scale;
 530             }
 531          }
 532       }
 533    }
 534
 535    for (int i = 0; i < list_length(&comp->reg_list); i++)
 536       ra_set_node_spill_cost(g, i, spill_costs[i]);
 537
 538    int r = ra_get_best_spill_node(g);
 539    if (r == -1)
 540       return NULL;
 541
 542    ppir_reg *chosen = NULL;
 543    int i = 0;
 544    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
 545       if (i++ == r) {
 546          chosen = reg;
 547          break;
 548       }
 549    }
 550    assert(chosen);
 551    chosen->spilled = true;
 552    chosen->is_head = true; /* store_temp unable to do swizzle */
 553
 554    return chosen;
 555 }
 556
 557 static void ppir_regalloc_reset_liveness_info(ppir_compiler *comp)
 558 {
 559    int idx = 0;
 560
 561    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
 562       reg->regalloc_index = idx++;
 563    }
 564
 565    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 566
 567       if (block->live_in)
 568          ralloc_free(block->live_in);
 569       block->live_in = rzalloc_array(comp,
 570             struct ppir_liveness, list_length(&comp->reg_list));
 571
 572       if (block->live_in_set)
 573          _mesa_set_destroy(block->live_in_set, NULL);
 574       block->live_in_set = _mesa_set_create(comp,
 575                                             _mesa_hash_pointer,
 576                                             _mesa_key_pointer_equal);
 577
 578       if (block->live_out)
 579          ralloc_free(block->live_out);
 580       block->live_out = rzalloc_array(comp,
 581             struct ppir_liveness, list_length(&comp->reg_list));
 582
 583       if (block->live_out_set)
 584          _mesa_set_destroy(block->live_out_set, NULL);
 585       block->live_out_set = _mesa_set_create(comp,
 586                                              _mesa_hash_pointer,
 587                                              _mesa_key_pointer_equal);
 588
 589       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 590
 591          if (instr->live_in)
 592             ralloc_free(instr->live_in);
 593          instr->live_in = rzalloc_array(comp,
 594                struct ppir_liveness, list_length(&comp->reg_list));
 595
 596          if (instr->live_in_set)
 597             _mesa_set_destroy(instr->live_in_set, NULL);
 598          instr->live_in_set = _mesa_set_create(comp,
 599                                                _mesa_hash_pointer,
 600                                                _mesa_key_pointer_equal);
 601
 602          if (instr->live_internal)
 603             ralloc_free(instr->live_internal);
 604          instr->live_internal = rzalloc_array(comp,
 605                struct ppir_liveness, list_length(&comp->reg_list));
 606
 607          if (instr->live_internal_set)
 608             _mesa_set_destroy(instr->live_internal_set, NULL);
 609          instr->live_internal_set = _mesa_set_create(comp,
 610                                                _mesa_hash_pointer,
 611                                                _mesa_key_pointer_equal);
 612
 613          if (instr->live_out)
 614             ralloc_free(instr->live_out);
 615          instr->live_out = rzalloc_array(comp,
 616                struct ppir_liveness, list_length(&comp->reg_list));
 617
 618          if (instr->live_out_set)
 619             _mesa_set_destroy(instr->live_out_set, NULL);
 620          instr->live_out_set = _mesa_set_create(comp,
 621                                                 _mesa_hash_pointer,
 622                                                 _mesa_key_pointer_equal);
 623       }
 624    }
 625 }
 626
 627 static void ppir_all_interference(ppir_compiler *comp, struct ra_graph *g,
 628                                   struct set *liveness)
 629 {
 630    set_foreach(liveness, entry1) {
 631       set_foreach(liveness, entry2) {
 632          const struct ppir_liveness *r1 = entry1->key;
 633          const struct ppir_liveness *r2 = entry2->key;
 634          ra_add_node_interference(g, r1->reg->regalloc_index,
 635                                      r2->reg->regalloc_index);
 636       }
 637       _mesa_set_remove(liveness, entry1);
 638    }
 639 }
 640
 641 int lima_ppir_force_spilling = 0;
 642
 643 static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
 644 {
 645    ppir_regalloc_reset_liveness_info(comp);
 646
 647    struct ra_graph *g = ra_alloc_interference_graph(
 648       comp->ra, list_length(&comp->reg_list));
 649
 650    int n = 0;
 651    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
 652       int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1);
 653       if (reg->is_head)
 654          c += 4;
 655       ra_set_node_class(g, n++, c);
 656    }
 657
 658    ppir_liveness_analysis(comp);
 659
 660    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
 661       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
 662          set_foreach(instr->live_internal_set, entry) {
 663             _mesa_set_add(instr->live_in_set, entry->key);
 664             _mesa_set_add(instr->live_out_set, entry->key);
 665          }
 666          ppir_all_interference(comp, g, instr->live_in_set);
 667          ppir_all_interference(comp, g, instr->live_out_set);
 668       }
 669    }
 670
 671    *spilled = false;
 672    bool ok = ra_allocate(g);
 673    if (!ok || (comp->force_spilling-- > 0)) {
 674       ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g);
 675       if (chosen) {
 676          /* stack_size will be used to assemble the frame reg in lima_draw.
 677           * It is also be used in the spilling code, as negative indices
 678           * starting from -1, to create stack addresses. */
 679          comp->prog->stack_size++;
 680          if (!ppir_regalloc_spill_reg(comp, chosen))
 681             goto err_out;
 682          /* Ask the outer loop to call back in. */
 683          *spilled = true;
 684
 685          ppir_debug("spilled register %d/%d, num_components: %d\n",
 686                     chosen->regalloc_index, list_length(&comp->reg_list),
 687                     chosen->num_components);
 688          goto err_out;
 689       }
 690
 691       ppir_error("regalloc fail\n");
 692       goto err_out;
 693    }
 694
 695    n = 0;
 696    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
 697       int reg_index = ra_get_node_reg(g, n++);
 698       reg->index = get_phy_reg_index(reg_index);
 699    }
 700
 701    ralloc_free(g);
 702
 703    if (lima_debug & LIMA_DEBUG_PP)
 704       ppir_regalloc_print_result(comp);
 705
 706    return true;
 707
 708 err_out:
 709    ralloc_free(g);
 710    return false;
 711 }
 712
 713 bool ppir_regalloc_prog(ppir_compiler *comp)
 714 {
 715    bool spilled = false;
 716    comp->prog->stack_size = 0;
 717
 718    /* Set from an environment variable to force spilling
 719     * for debugging purposes, see lima_screen.c */
 720    comp->force_spilling = lima_ppir_force_spilling;
 721
 722    ppir_regalloc_update_reglist_ssa(comp);
 723
 724    /* No registers? Probably shader consists of discard instruction */
 725    if (list_is_empty(&comp->reg_list))
 726       return true;
 727
 728    /* this will most likely succeed in the first
 729     * try, except for very complicated shaders */
 730    while (!ppir_regalloc_prog_try(comp, &spilled))
 731       if (!spilled)
 732          return false;
 733
 734    return true;
 735 }