src/freedreno/ir3/ir3_postsched.c

   1 /*
   2  * Copyright (C) 2019 Google, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27
  28 #include "util/dag.h"
  29 #include "util/u_math.h"
  30
  31 #include "ir3.h"
  32 #include "ir3_compiler.h"
  33 #include "ir3_context.h"
  34
  35 #ifdef DEBUG
  36 #define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS)
  37 #else
  38 #define SCHED_DEBUG 0
  39 #endif
  40 #define d(fmt, ...) do { if (SCHED_DEBUG) { \
  41         printf("PSCHED: "fmt"\n", ##__VA_ARGS__); \
  42 } } while (0)
  43
  44 #define di(instr, fmt, ...) do { if (SCHED_DEBUG) { \
  45         printf("PSCHED: "fmt": ", ##__VA_ARGS__); \
  46         ir3_print_instr(instr); \
  47 } } while (0)
  48
  49 /*
  50  * Post RA Instruction Scheduling
  51  */
  52
  53 struct ir3_postsched_ctx {
  54         struct ir3_context *ctx;
  55
  56         void *mem_ctx;
  57         struct ir3_block *block;           /* the current block */
  58         struct dag *dag;
  59
  60         struct list_head unscheduled_list; /* unscheduled instructions */
  61         struct ir3_instruction *scheduled; /* last scheduled instr */
  62         struct ir3_instruction *pred;      /* current p0.x user, if any */
  63
  64         bool error;
  65
  66         int sfu_delay;
  67 };
  68
  69 struct ir3_postsched_node {
  70         struct dag_node dag;     /* must be first for util_dynarray_foreach */
  71         struct ir3_instruction *instr;
  72         bool partially_evaluated_path;
  73
  74         unsigned delay;
  75         unsigned max_delay;
  76 };
  77
  78 #define foreach_sched_node(__n, __list) \
  79         list_for_each_entry(struct ir3_postsched_node, __n, __list, dag.link)
  80
  81 #define foreach_bit(b, mask) \
  82         for (uint32_t _m = ({debug_assert((mask) >= 1); (mask);}); _m && ({(b) = u_bit_scan(&_m); 1;});)
  83
  84 static void
  85 schedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
  86 {
  87         debug_assert(ctx->block == instr->block);
  88
  89         /* remove from unscheduled_list:
  90          */
  91         list_delinit(&instr->node);
  92
  93         if (writes_pred(instr)) {
  94                 ctx->pred = instr;
  95         }
  96
  97         di(instr, "schedule");
  98
  99         list_addtail(&instr->node, &instr->block->instr_list);
 100         ctx->scheduled = instr;
 101
 102         if (is_sfu(instr)) {
 103                 ctx->sfu_delay = 8;
 104         } else if (ctx->sfu_delay > 0) {
 105                 ctx->sfu_delay--;
 106         }
 107
 108         struct ir3_postsched_node *n = instr->data;
 109         dag_prune_head(ctx->dag, &n->dag);
 110 }
 111
 112 static void
 113 dump_state(struct ir3_postsched_ctx *ctx)
 114 {
 115         if (!SCHED_DEBUG)
 116                 return;
 117
 118         foreach_sched_node (n, &ctx->dag->heads) {
 119                 di(n->instr, "maxdel=%3d    ", n->max_delay);
 120
 121                 util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
 122                         struct ir3_postsched_node *child =
 123                                 (struct ir3_postsched_node *)edge->child;
 124
 125                         di(child->instr, " -> (%d parents) ", child->dag.parent_count);
 126                 }
 127         }
 128 }
 129
 130 /* Determine if this is an instruction that we'd prefer not to schedule
 131  * yet, in order to avoid an (ss) sync.  This is limited by the sfu_delay
 132  * counter, ie. the more cycles it has been since the last SFU, the less
 133  * costly a sync would be.
 134  */
 135 static bool
 136 would_sync(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
 137 {
 138         if (ctx->sfu_delay) {
 139                 struct ir3_register *reg;
 140                 foreach_src (reg, instr)
 141                         if (reg->instr && is_sfu(reg->instr))
 142                                 return true;
 143         }
 144
 145         return false;
 146 }
 147
 148 /* find instruction to schedule: */
 149 static struct ir3_instruction *
 150 choose_instr(struct ir3_postsched_ctx *ctx)
 151 {
 152         struct ir3_postsched_node *chosen = NULL;
 153
 154         dump_state(ctx);
 155
 156         foreach_sched_node (n, &ctx->dag->heads) {
 157                 if (!is_meta(n->instr))
 158                         continue;
 159
 160                 if (!chosen || (chosen->max_delay < n->max_delay))
 161                         chosen = n;
 162         }
 163
 164         if (chosen) {
 165                 di(chosen->instr, "prio: chose (meta)");
 166                 return chosen->instr;
 167         }
 168
 169         /* Try to schedule inputs with a higher priority, if possible, as
 170          * the last bary.f unlocks varying storage to unblock more VS
 171          * warps.
 172          */
 173         foreach_sched_node (n, &ctx->dag->heads) {
 174                 if (!is_input(n->instr))
 175                         continue;
 176
 177                 if (!chosen || (chosen->max_delay < n->max_delay))
 178                         chosen = n;
 179         }
 180
 181         if (chosen) {
 182                 di(chosen->instr, "prio: chose (input)");
 183                 return chosen->instr;
 184         }
 185
 186         /* Next prioritize discards: */
 187         foreach_sched_node (n, &ctx->dag->heads) {
 188                 unsigned d = ir3_delay_calc(ctx->block, n->instr, false, false);
 189
 190                 if (d > 0)
 191                         continue;
 192
 193                 if (!is_kill(n->instr))
 194                         continue;
 195
 196                 if (!chosen || (chosen->max_delay < n->max_delay))
 197                         chosen = n;
 198         }
 199
 200         if (chosen) {
 201                 di(chosen->instr, "csp: chose (kill, hard ready)");
 202                 return chosen->instr;
 203         }
 204
 205         /* Next prioritize expensive instructions: */
 206         foreach_sched_node (n, &ctx->dag->heads) {
 207                 unsigned d = ir3_delay_calc(ctx->block, n->instr, false, false);
 208
 209                 if (d > 0)
 210                         continue;
 211
 212                 if (!(is_sfu(n->instr) || is_tex(n->instr)))
 213                         continue;
 214
 215                 if (!chosen || (chosen->max_delay < n->max_delay))
 216                         chosen = n;
 217         }
 218
 219         if (chosen) {
 220                 di(chosen->instr, "csp: chose (sfu/tex, hard ready)");
 221                 return chosen->instr;
 222         }
 223
 224         /*
 225          * Sometimes be better to take a nop, rather than scheduling an
 226          * instruction that would require an (ss) shortly after another
 227          * SFU..  ie. if last SFU was just one or two instr ago, and we
 228          * could choose between taking a nop and then scheduling
 229          * something else, vs scheduling the immed avail instruction that
 230          * would require (ss), we are better with the nop.
 231          */
 232         for (unsigned delay = 0; delay < 4; delay++) {
 233                 foreach_sched_node (n, &ctx->dag->heads) {
 234                         if (would_sync(ctx, n->instr))
 235                                 continue;
 236
 237                         unsigned d = ir3_delay_calc(ctx->block, n->instr, true, false);
 238
 239                         if (d > delay)
 240                                 continue;
 241
 242                         if (!chosen || (chosen->max_delay < n->max_delay))
 243                                 chosen = n;
 244                 }
 245
 246                 if (chosen) {
 247                         di(chosen->instr, "csp: chose (soft ready, delay=%u)", delay);
 248                         return chosen->instr;
 249                 }
 250         }
 251
 252         /* Next try to find a ready leader w/ soft delay (ie. including extra
 253          * delay for things like tex fetch which can be synchronized w/ sync
 254          * bit (but we probably do want to schedule some other instructions
 255          * while we wait)
 256          */
 257         foreach_sched_node (n, &ctx->dag->heads) {
 258                 unsigned d = ir3_delay_calc(ctx->block, n->instr, true, false);
 259
 260                 if (d > 0)
 261                         continue;
 262
 263                 if (!chosen || (chosen->max_delay < n->max_delay))
 264                         chosen = n;
 265         }
 266
 267         if (chosen) {
 268                 di(chosen->instr, "csp: chose (soft ready)");
 269                 return chosen->instr;
 270         }
 271
 272         /* Next try to find a ready leader that can be scheduled without nop's,
 273          * which in the case of things that need (sy)/(ss) could result in
 274          * stalls.. but we've already decided there is not a better option.
 275          */
 276         foreach_sched_node (n, &ctx->dag->heads) {
 277                 unsigned d = ir3_delay_calc(ctx->block, n->instr, false, false);
 278
 279                 if (d > 0)
 280                         continue;
 281
 282                 if (!chosen || (chosen->max_delay < n->max_delay))
 283                         chosen = n;
 284         }
 285
 286         if (chosen) {
 287                 di(chosen->instr, "csp: chose (hard ready)");
 288                 return chosen->instr;
 289         }
 290
 291         /* Otherwise choose leader with maximum cost:
 292          *
 293          * TODO should we try to balance cost and delays?  I guess it is
 294          * a balance between now-nop's and future-nop's?
 295          */
 296         foreach_sched_node (n, &ctx->dag->heads) {
 297                 if (!chosen || chosen->max_delay < n->max_delay)
 298                         chosen = n;
 299         }
 300
 301         if (chosen) {
 302                 di(chosen->instr, "csp: chose (leader)");
 303                 return chosen->instr;
 304         }
 305
 306         return NULL;
 307 }
 308
 309 struct ir3_postsched_deps_state {
 310         struct ir3_context *ctx;
 311
 312         enum { F, R } direction;
 313
 314         bool merged;
 315
 316         /* Track the mapping between sched node (instruction) that last
 317          * wrote a given register (in whichever direction we are iterating
 318          * the block)
 319          *
 320          * Note, this table is twice as big as the # of regs, to deal with
 321          * half-precision regs.  The approach differs depending on whether
 322          * the half and full precision register files are "merged" (conflict,
 323          * ie. a6xx+) in which case we consider each full precision dep
 324          * as two half-precision dependencies, vs older separate (non-
 325          * conflicting) in which case the first half of the table is used
 326          * for full precision and 2nd half for half-precision.
 327          */
 328         struct ir3_postsched_node *regs[2 * 256];
 329 };
 330
 331 /* bounds checking read/write accessors, since OoB access to stuff on
 332  * the stack is gonna cause a bad day.
 333  */
 334 #define dep_reg(state, idx) *({ \
 335                 assert((idx) < ARRAY_SIZE((state)->regs)); \
 336                 &(state)->regs[(idx)]; \
 337         })
 338
 339 static void
 340 add_dep(struct ir3_postsched_deps_state *state,
 341                 struct ir3_postsched_node *before,
 342                 struct ir3_postsched_node *after)
 343 {
 344         if (!before || !after)
 345                 return;
 346
 347         assert(before != after);
 348
 349         if (state->direction == F) {
 350                 dag_add_edge(&before->dag, &after->dag, NULL);
 351         } else {
 352                 dag_add_edge(&after->dag, &before->dag, NULL);
 353         }
 354 }
 355
 356 static void
 357 add_single_reg_dep(struct ir3_postsched_deps_state *state,
 358                 struct ir3_postsched_node *node, unsigned num, bool write)
 359 {
 360         add_dep(state, dep_reg(state, num), node);
 361         if (write) {
 362                 dep_reg(state, num) = node;
 363         }
 364 }
 365
 366 /* This is where we handled full vs half-precision, and potential conflicts
 367  * between half and full precision that result in additional dependencies.
 368  * The 'reg' arg is really just to know half vs full precision.
 369  */
 370 static void
 371 add_reg_dep(struct ir3_postsched_deps_state *state,
 372                 struct ir3_postsched_node *node, const struct ir3_register *reg,
 373                 unsigned num, bool write)
 374 {
 375         if (state->merged) {
 376                 if (reg->flags & IR3_REG_HALF) {
 377                         /* single conflict in half-reg space: */
 378                         add_single_reg_dep(state, node, num, write);
 379                 } else {
 380                         /* two conflicts in half-reg space: */
 381                         add_single_reg_dep(state, node, 2 * num + 0, write);
 382                         add_single_reg_dep(state, node, 2 * num + 1, write);
 383                 }
 384         } else {
 385                 if (reg->flags & IR3_REG_HALF)
 386                         num += ARRAY_SIZE(state->regs) / 2;
 387                 add_single_reg_dep(state, node, num, write);
 388         }
 389 }
 390
 391 static void
 392 calculate_deps(struct ir3_postsched_deps_state *state,
 393                 struct ir3_postsched_node *node)
 394 {
 395         static const struct ir3_register half_reg = { .flags = IR3_REG_HALF };
 396         struct ir3_register *reg;
 397         int b;
 398
 399         /* Add dependencies on instructions that previously (or next,
 400          * in the reverse direction) wrote any of our src registers:
 401          */
 402         foreach_src_n (reg, i, node->instr) {
 403                 /* NOTE: relative access for a src can be either const or gpr: */
 404                 if (reg->flags & IR3_REG_RELATIV) {
 405                         /* also reads a0.x: */
 406                         add_reg_dep(state, node, &half_reg, regid(REG_A0, 0), false);
 407                 }
 408
 409                 if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
 410                         continue;
 411
 412                 if (reg->flags & IR3_REG_RELATIV) {
 413                         /* mark entire array as read: */
 414                         struct ir3_array *arr = ir3_lookup_array(state->ctx->ir, reg->array.id);
 415                         for (unsigned i = 0; i < arr->length; i++) {
 416                                 add_reg_dep(state, node, reg, arr->reg + i, false);
 417                         }
 418                 } else {
 419                         foreach_bit (b, reg->wrmask) {
 420                                 add_reg_dep(state, node, reg, reg->num + b, false);
 421
 422                                 struct ir3_postsched_node *dep = dep_reg(state, reg->num + b);
 423                                 if (dep && (state->direction == F)) {
 424                                         unsigned d = ir3_delayslots(dep->instr, node->instr, i, true);
 425                                         node->delay = MAX2(node->delay, d);
 426                                 }
 427                         }
 428                 }
 429         }
 430
 431         if (dest_regs(node->instr) == 0)
 432                 return;
 433
 434         /* And then after we update the state for what this instruction
 435          * wrote:
 436          */
 437         reg = node->instr->regs[0];
 438         if (reg->flags & IR3_REG_RELATIV) {
 439                 /* mark the entire array as written: */
 440                 struct ir3_array *arr = ir3_lookup_array(state->ctx->ir, reg->array.id);
 441                 for (unsigned i = 0; i < arr->length; i++) {
 442                         add_reg_dep(state, node, reg, arr->reg + i, true);
 443                 }
 444
 445                 /* also reads a0.x: */
 446                 add_reg_dep(state, node, &half_reg, regid(REG_A0, 0), false);
 447         } else {
 448                 foreach_bit (b, reg->wrmask) {
 449                         add_reg_dep(state, node, reg, reg->num + b, true);
 450                 }
 451         }
 452 }
 453
 454 static void
 455 calculate_forward_deps(struct ir3_postsched_ctx *ctx)
 456 {
 457         struct ir3_postsched_deps_state state = {
 458                         .ctx = ctx->ctx,
 459                         .direction = F,
 460                         .merged = ctx->ctx->compiler->gpu_id >= 600,
 461         };
 462
 463         foreach_instr (instr, &ctx->unscheduled_list) {
 464                 calculate_deps(&state, instr->data);
 465         }
 466 }
 467
 468 static void
 469 calculate_reverse_deps(struct ir3_postsched_ctx *ctx)
 470 {
 471         struct ir3_postsched_deps_state state = {
 472                         .ctx = ctx->ctx,
 473                         .direction = R,
 474                         .merged = ctx->ctx->compiler->gpu_id >= 600,
 475         };
 476
 477         foreach_instr_rev (instr, &ctx->unscheduled_list) {
 478                 calculate_deps(&state, instr->data);
 479         }
 480 }
 481
 482 static void
 483 sched_node_init(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
 484 {
 485         struct ir3_postsched_node *n = rzalloc(ctx->mem_ctx, struct ir3_postsched_node);
 486
 487         dag_init_node(ctx->dag, &n->dag);
 488
 489         n->instr = instr;
 490         instr->data = n;
 491 }
 492
 493 static void
 494 sched_dag_max_delay_cb(struct dag_node *node, void *state)
 495 {
 496         struct ir3_postsched_node *n = (struct ir3_postsched_node *)node;
 497         uint32_t max_delay = 0;
 498
 499         util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
 500                 struct ir3_postsched_node *child = (struct ir3_postsched_node *)edge->child;
 501                 max_delay = MAX2(child->max_delay, max_delay);
 502         }
 503
 504         n->max_delay = MAX2(n->max_delay, max_delay + n->delay);
 505 }
 506
 507 static void
 508 sched_dag_init(struct ir3_postsched_ctx *ctx)
 509 {
 510         ctx->mem_ctx = ralloc_context(NULL);
 511
 512         ctx->dag = dag_create(ctx->mem_ctx);
 513
 514         foreach_instr (instr, &ctx->unscheduled_list)
 515                 sched_node_init(ctx, instr);
 516
 517         calculate_forward_deps(ctx);
 518         calculate_reverse_deps(ctx);
 519
 520         /*
 521          * Normal srcs won't be in SSA at this point, those are dealt with in
 522          * calculate_forward_deps() and calculate_reverse_deps().  But we still
 523          * have the false-dep information in SSA form, so go ahead and add
 524          * dependencies for that here:
 525          */
 526         foreach_instr (instr, &ctx->unscheduled_list) {
 527                 struct ir3_postsched_node *n = instr->data;
 528                 struct ir3_instruction *src;
 529
 530                 foreach_ssa_src_n (src, i, instr) {
 531                         if (src->block != instr->block)
 532                                 continue;
 533
 534                         /* we can end up with unused false-deps.. just skip them: */
 535                         if (src->flags & IR3_INSTR_UNUSED)
 536                                 continue;
 537
 538                         struct ir3_postsched_node *sn = src->data;
 539
 540                         /* don't consider dependencies in other blocks: */
 541                         if (src->block != instr->block)
 542                                 continue;
 543
 544                         dag_add_edge(&sn->dag, &n->dag, NULL);
 545                 }
 546         }
 547
 548         // TODO do we want to do this after reverse-dependencies?
 549         dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL);
 550 }
 551
 552 static void
 553 sched_dag_destroy(struct ir3_postsched_ctx *ctx)
 554 {
 555         ralloc_free(ctx->mem_ctx);
 556         ctx->mem_ctx = NULL;
 557         ctx->dag = NULL;
 558 }
 559
 560 static void
 561 sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block)
 562 {
 563         ctx->block = block;
 564         ctx->scheduled = NULL;
 565         ctx->pred = NULL;
 566
 567         /* move all instructions to the unscheduled list, and
 568          * empty the block's instruction list (to which we will
 569          * be inserting).
 570          */
 571         list_replace(&block->instr_list, &ctx->unscheduled_list);
 572         list_inithead(&block->instr_list);
 573
 574         // TODO once we are using post-sched for everything we can
 575         // just not stick in NOP's prior to post-sched, and drop this.
 576         // for now keep this, since it makes post-sched optional:
 577         foreach_instr_safe (instr, &ctx->unscheduled_list) {
 578                 switch (instr->opc) {
 579                 case OPC_NOP:
 580                 case OPC_BR:
 581                 case OPC_JUMP:
 582                         list_delinit(&instr->node);
 583                         break;
 584                 default:
 585                         break;
 586                 }
 587         }
 588
 589         sched_dag_init(ctx);
 590
 591         /* First schedule all meta:input instructions, followed by
 592          * tex-prefetch.  We want all of the instructions that load
 593          * values into registers before the shader starts to go
 594          * before any other instructions.  But in particular we
 595          * want inputs to come before prefetches.  This is because
 596          * a FS's bary_ij input may not actually be live in the
 597          * shader, but it should not be scheduled on top of any
 598          * other input (but can be overwritten by a tex prefetch)
 599          */
 600         foreach_instr_safe (instr, &ctx->unscheduled_list)
 601                 if (instr->opc == OPC_META_INPUT)
 602                         schedule(ctx, instr);
 603
 604         foreach_instr_safe (instr, &ctx->unscheduled_list)
 605                 if (instr->opc == OPC_META_TEX_PREFETCH)
 606                         schedule(ctx, instr);
 607
 608         while (!list_is_empty(&ctx->unscheduled_list)) {
 609                 struct ir3_instruction *instr;
 610
 611                 instr = choose_instr(ctx);
 612
 613                 /* this shouldn't happen: */
 614                 if (!instr) {
 615                         ctx->error = true;
 616                         break;
 617                 }
 618
 619                 unsigned delay = ir3_delay_calc(ctx->block, instr, false, false);
 620                 d("delay=%u", delay);
 621
 622                 /* and if we run out of instructions that can be scheduled,
 623                  * then it is time for nop's:
 624                  */
 625                 debug_assert(delay <= 6);
 626                 while (delay > 0) {
 627                         ir3_NOP(block);
 628                         delay--;
 629                 }
 630
 631                 schedule(ctx, instr);
 632         }
 633
 634         sched_dag_destroy(ctx);
 635 }
 636
 637
 638 static bool
 639 is_self_mov(struct ir3_instruction *instr)
 640 {
 641         if (!is_same_type_mov(instr))
 642                 return false;
 643
 644         if (instr->regs[0]->num != instr->regs[1]->num)
 645                 return false;
 646
 647         if (instr->regs[0]->flags & IR3_REG_RELATIV)
 648                 return false;
 649
 650         if (instr->regs[1]->flags & (IR3_REG_CONST | IR3_REG_IMMED |
 651                         IR3_REG_RELATIV | IR3_REG_FNEG | IR3_REG_FABS |
 652                         IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT |
 653                         IR3_REG_EVEN | IR3_REG_POS_INF))
 654                 return false;
 655
 656         return true;
 657 }
 658
 659 /* sometimes we end up w/ in-place mov's, ie. mov.u32u32 r1.y, r1.y
 660  * as a result of places were before RA we are not sure that it is
 661  * safe to eliminate.  We could eliminate these earlier, but sometimes
 662  * they are tangled up in false-dep's, etc, so it is easier just to
 663  * let them exist until after RA
 664  */
 665 static void
 666 cleanup_self_movs(struct ir3 *ir)
 667 {
 668         foreach_block (block, &ir->block_list) {
 669                 foreach_instr_safe (instr, &block->instr_list) {
 670                         struct ir3_register *reg;
 671
 672                         foreach_src (reg, instr) {
 673                                 if (!reg->instr)
 674                                         continue;
 675
 676                                 if (is_self_mov(reg->instr)) {
 677                                         list_delinit(&reg->instr->node);
 678                                         reg->instr = reg->instr->regs[1]->instr;
 679                                 }
 680                         }
 681
 682                         for (unsigned i = 0; i < instr->deps_count; i++) {
 683                                 if (is_self_mov(instr->deps[i])) {
 684                                         list_delinit(&instr->deps[i]->node);
 685                                         instr->deps[i] = instr->deps[i]->regs[1]->instr;
 686                                 }
 687                         }
 688                 }
 689         }
 690 }
 691
 692 int
 693 ir3_postsched(struct ir3_context *cctx)
 694 {
 695         struct ir3_postsched_ctx ctx = {
 696                         .ctx = cctx,
 697         };
 698
 699         ir3_remove_nops(cctx->ir);
 700         cleanup_self_movs(cctx->ir);
 701
 702         foreach_block (block, &cctx->ir->block_list) {
 703                 sched_block(&ctx, block);
 704         }
 705
 706         if (ctx.error)
 707                 return -1;
 708
 709         return 0;
 710 }