src/gallium/drivers/r600/sb/sb_gcm.cpp

   1 /*
   2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Vadim Girlin
  25  */
  26
  27 #define GCM_DEBUG 0
  28
  29 #if GCM_DEBUG
  30 #define GCM_DUMP(a) do { a } while(0);
  31 #else
  32 #define GCM_DUMP(a)
  33 #endif
  34
  35 #include <iostream>
  36 #include <map>
  37
  38 #include "sb_bc.h"
  39 #include "sb_shader.h"
  40
  41 #include "sb_pass.h"
  42
  43 namespace r600_sb {
  44
  45 using std::cerr;
  46
  47 int gcm::run() {
  48
  49         GCM_DUMP( cerr << "==== GCM ==== \n"; sh.dump_ir(); );
  50
  51         collect_instructions(sh.root, true);
  52
  53         init_def_count(uses, pending);
  54
  55         for (node_iterator N, I = pending.begin(), E = pending.end();
  56                         I != E; I = N) {
  57                 N = I;
  58                 ++N;
  59                 node *o = *I;
  60
  61                 GCM_DUMP(
  62                         cerr << "pending : ";
  63                         dump::dump_op(o);
  64                         cerr << "\n";
  65                 );
  66
  67                 if (td_is_ready(o)) {
  68
  69                         GCM_DUMP(
  70                                 cerr << "  ready: ";
  71                                 dump::dump_op(o);
  72                                 cerr << "\n";
  73                         );
  74                         pending.remove_node(o);
  75                         ready.push_back(o);
  76                 } else {
  77                 }
  78         }
  79
  80         sched_early(sh.root);
  81
  82         if (!pending.empty()) {
  83                 cerr << "##### gcm_sched_early_pass: unscheduled ops:\n";
  84                 dump::dump_op(pending.front());
  85         }
  86
  87         assert(pending.empty());
  88
  89         GCM_DUMP( sh.dump_ir(); );
  90
  91         GCM_DUMP( cerr << "\n\n ############## gcm late\n\n"; );
  92
  93         collect_instructions(sh.root, false);
  94
  95         init_use_count(uses, pending);
  96
  97         sched_late(sh.root);
  98         if (!pending.empty()) {
  99                 cerr << "##### gcm_sched_late_pass: unscheduled ops:\n";
 100                 dump::dump_op(pending.front());
 101         }
 102
 103         assert(ucs_level == 0);
 104         assert(pending.empty());
 105
 106         return 0;
 107 }
 108
 109
 110 void gcm::collect_instructions(container_node *c, bool early_pass) {
 111         if (c->is_bb()) {
 112
 113                 if (early_pass) {
 114                         for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 115                                 node *n = *I;
 116                                 if (n->flags & NF_DONT_MOVE) {
 117                                         op_info &o = op_map[n];
 118                                         o.top_bb = o.bottom_bb = static_cast<bb_node*>(c);
 119                                 }
 120                         }
 121                 }
 122
 123                 pending.append_from(c);
 124                 return;
 125         }
 126
 127         for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 128                 if (I->is_container()) {
 129                         collect_instructions(static_cast<container_node*>(*I), early_pass);
 130                 }
 131         }
 132 }
 133
 134 void gcm::sched_early(container_node *n) {
 135
 136         region_node *r =
 137                         (n->type == NT_REGION) ? static_cast<region_node*>(n) : NULL;
 138
 139         if (r && r->loop_phi) {
 140                 sched_early(r->loop_phi);
 141         }
 142
 143         for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) {
 144                 if (I->type == NT_OP) {
 145                         node *op = *I;
 146                         if (op->subtype == NST_PHI) {
 147                                 td_release_uses(op->dst);
 148                         }
 149                 } else if (I->is_container()) {
 150                         if (I->subtype == NST_BB) {
 151                                 bb_node* bb = static_cast<bb_node*>(*I);
 152                                 td_sched_bb(bb);
 153                         } else {
 154                                 sched_early(static_cast<container_node*>(*I));
 155                         }
 156                 }
 157         }
 158
 159         if (r && r->phi) {
 160                 sched_early(r->phi);
 161         }
 162 }
 163
 164 void gcm::td_schedule(bb_node *bb, node *n) {
 165         GCM_DUMP(
 166                 cerr << "scheduling : ";
 167                 dump::dump_op(n);
 168                 cerr << "\n";
 169         );
 170         td_release_uses(n->dst);
 171
 172         bb->push_back(n);
 173
 174         op_map[n].top_bb = bb;
 175
 176 }
 177
 178 void gcm::td_sched_bb(bb_node* bb) {
 179         GCM_DUMP(
 180         cerr << "td scheduling BB_" << bb->id << "\n";
 181         );
 182
 183         while (!ready.empty()) {
 184                 for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E;
 185                                 I = N) {
 186                         N = I; ++N;
 187                         td_schedule(bb, *I);
 188                         ready.erase(I);
 189                 }
 190         }
 191 }
 192
 193 bool gcm::td_is_ready(node* n) {
 194         return uses[n] == 0;
 195 }
 196
 197 void gcm::td_release_val(value *v) {
 198
 199         GCM_DUMP(
 200                 cerr << "td checking uses: ";
 201                 dump::dump_val(v);
 202                 cerr << "\n";
 203         );
 204
 205         use_info *u = v->uses;
 206         while (u) {
 207                 if (u->op->parent != &pending) {
 208                         u = u->next;
 209                         continue;
 210                 }
 211
 212                 GCM_DUMP(
 213                         cerr << "td    used in ";
 214                         dump::dump_op(u->op);
 215                         cerr << "\n";
 216                 );
 217
 218                 if (--uses[u->op] == 0) {
 219                         GCM_DUMP(
 220                                 cerr << "td        released : ";
 221                                 dump::dump_op(u->op);
 222                                 cerr << "\n";
 223                         );
 224
 225                         pending.remove_node(u->op);
 226                         ready.push_back(u->op);
 227                 }
 228                 u = u->next;
 229         }
 230
 231 }
 232
 233 void gcm::td_release_uses(vvec& v) {
 234         for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) {
 235                 value *v = *I;
 236                 if (!v)
 237                         continue;
 238
 239                 if (v->is_rel())
 240                         td_release_uses(v->mdef);
 241                 else
 242                         td_release_val(v);
 243         }
 244 }
 245
 246 void gcm::sched_late(container_node *n) {
 247
 248         bool stack_pushed = false;
 249
 250         if (n->is_depart()) {
 251                 depart_node *d = static_cast<depart_node*>(n);
 252                 push_uc_stack();
 253                 stack_pushed = true;
 254                 bu_release_phi_defs(d->target->phi, d->dep_id);
 255         } else if (n->is_repeat()) {
 256                 repeat_node *r = static_cast<repeat_node*>(n);
 257                 assert(r->target->loop_phi);
 258                 push_uc_stack();
 259                 stack_pushed = true;
 260                 bu_release_phi_defs(r->target->loop_phi, r->rep_id);
 261         }
 262
 263         for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
 264                 if (I->is_container()) {
 265                         if (I->subtype == NST_BB) {
 266                                 bb_node* bb = static_cast<bb_node*>(*I);
 267                                 bu_sched_bb(bb);
 268                         } else {
 269                                 sched_late(static_cast<container_node*>(*I));
 270                         }
 271                 }
 272         }
 273
 274         if (n->type == NT_IF) {
 275                 if_node *f = static_cast<if_node*>(n);
 276                 if (f->cond)
 277                         pending_defs.push_back(f->cond);
 278         } else if (n->type == NT_REGION) {
 279                 region_node *r = static_cast<region_node*>(n);
 280                 if (r->loop_phi)
 281                         bu_release_phi_defs(r->loop_phi, 0);
 282         }
 283
 284         if (stack_pushed)
 285                 pop_uc_stack();
 286
 287 }
 288
 289 void gcm::bu_sched_bb(bb_node* bb) {
 290         GCM_DUMP(
 291         cerr << "bu scheduling BB_" << bb->id << "\n";
 292         );
 293
 294         bu_bb = bb;
 295
 296         if (!pending_nodes.empty()) {
 297                 GCM_DUMP(
 298                                 cerr << "pending nodes:\n";
 299                 );
 300
 301                 // TODO consider sorting the exports by array_base,
 302                 // possibly it can improve performance
 303
 304                 for (node_list::iterator I = pending_nodes.begin(),
 305                                 E = pending_nodes.end(); I != E; ++I) {
 306                         bu_release_op(*I);
 307                 }
 308                 pending_nodes.clear();
 309                 GCM_DUMP(
 310                         cerr << "pending nodes processed...\n";
 311                 );
 312         }
 313
 314
 315         if (!pending_defs.empty()) {
 316                 for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end();
 317                                 I != E; ++I) {
 318                         bu_release_val(*I);
 319                 }
 320                 pending_defs.clear();
 321         }
 322
 323         for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end();
 324                         I != E; I = N) {
 325                 N = I;
 326                 ++N;
 327                 node *n = *I;
 328                 if (op_map[n].bottom_bb == bb) {
 329                         add_ready(*I);
 330                         ready_above.erase(I);
 331                 }
 332         }
 333
 334         unsigned cnt_ready[SQ_NUM];
 335
 336         container_node *clause = NULL;
 337         unsigned last_inst_type = ~0;
 338         unsigned last_count = 0;
 339
 340         bool s = true;
 341         while (s) {
 342                 node *n;
 343
 344                 s = false;
 345
 346                 unsigned ready_mask = 0;
 347
 348                 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 349                         if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty())
 350                                 ready_mask |= (1 << sq);
 351                 }
 352
 353                 if (!ready_mask) {
 354                         for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 355                                 if (!bu_ready_early[sq].empty()) {
 356                                         node *n = bu_ready_early[sq].front();
 357                                         bu_ready_early[sq].pop_front();
 358                                         bu_ready[sq].push_back(n);
 359                                         break;
 360                                 }
 361                         }
 362                 }
 363
 364                 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 365
 366                         if (!bu_ready_next[sq].empty())
 367                                 bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]);
 368
 369                         cnt_ready[sq] = bu_ready[sq].size();
 370
 371                         if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold &&
 372                                         cnt_ready[sq] < ctx.max_fetch/2 &&
 373                                         !bu_ready_next[SQ_ALU].empty()) {
 374                                 sq = SQ_ALU;
 375                                 --sq;
 376                                 continue;
 377                         }
 378
 379                         while (!bu_ready[sq].empty()) {
 380
 381                                 if (last_inst_type != sq) {
 382                                         clause = NULL;
 383                                         last_count = 0;
 384                                         last_inst_type = sq;
 385                                 }
 386
 387                                 // simple heuristic to limit register pressure,
 388                                 if (sq == SQ_ALU && live_count > rp_threshold &&
 389                                                 (!bu_ready[SQ_TEX].empty() ||
 390                                                  !bu_ready[SQ_VTX].empty() ||
 391                                                  !bu_ready_next[SQ_TEX].empty() ||
 392                                                  !bu_ready_next[SQ_VTX].empty())) {
 393                                         GCM_DUMP( cerr << "switching to fetch (regpressure)\n"; );
 394                                         break;
 395                                 }
 396
 397                                 n = bu_ready[sq].front();
 398
 399                                 // real count (e.g. SAMPLE_G will be expanded to 3 instructions,
 400                                 // 2 SET_GRAD_ + 1 SAMPLE_G
 401                                 unsigned ncnt = 1;
 402                                 if (n->is_fetch_inst() && n->src.size() == 12) {
 403                                         ncnt = 3;
 404                                 }
 405
 406                                 if ((sq == SQ_TEX || sq == SQ_VTX) &&
 407                                                 ((last_count >= ctx.max_fetch/2 &&
 408                                                 check_alu_ready_count(24)) ||
 409                                                                 last_count + ncnt > ctx.max_fetch))
 410                                         break;
 411                                 else if (sq == SQ_CF && last_count > 4 &&
 412                                                 check_alu_ready_count(24))
 413                                         break;
 414
 415                                 bu_ready[sq].pop_front();
 416
 417                                 if (sq != SQ_CF) {
 418                                         if (!clause) {
 419                                                 clause = sh.create_clause(sq == SQ_ALU ?
 420                                                                 NST_ALU_CLAUSE :
 421                                                                         sq == SQ_TEX ? NST_TEX_CLAUSE :
 422                                                                                         NST_VTX_CLAUSE);
 423                                                 bb->push_front(clause);
 424                                         }
 425                                 } else {
 426                                         clause = bb;
 427                                 }
 428
 429                                 bu_schedule(clause, n);
 430                                 s = true;
 431                                 last_count += ncnt;
 432                         }
 433                 }
 434         }
 435
 436         bu_bb = NULL;
 437
 438         GCM_DUMP(
 439                 cerr << "bu finished scheduling BB_" << bb->id << "\n";
 440         );
 441 }
 442
 443 void gcm::bu_release_defs(vvec& v, bool src) {
 444         for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) {
 445                 value *v = *I;
 446                 if (!v || v->is_readonly())
 447                         continue;
 448
 449                 if (v->is_rel()) {
 450                         if (!v->rel->is_readonly())
 451                                 bu_release_val(v->rel);
 452                         bu_release_defs(v->muse, true);
 453                 } else if (src)
 454                         bu_release_val(v);
 455                 else {
 456                         if (live.remove_val(v)) {
 457                                 --live_count;
 458                         }
 459                 }
 460         }
 461 }
 462
 463 void gcm::push_uc_stack() {
 464         GCM_DUMP(
 465                 cerr << "pushing use count stack prev_level " << ucs_level
 466                         << "   new level " << (ucs_level + 1) << "\n";
 467         );
 468         ++ucs_level;
 469         if (ucs_level == nuc_stk.size()) {
 470                 nuc_stk.resize(ucs_level + 1);
 471         }
 472         else {
 473                 nuc_stk[ucs_level].clear();
 474         }
 475 }
 476
 477 bool gcm::bu_is_ready(node* n) {
 478         nuc_map &cm = nuc_stk[ucs_level];
 479         nuc_map::iterator F = cm.find(n);
 480         unsigned uc = (F == cm.end() ? 0 : F->second);
 481         return uc == uses[n];
 482 }
 483
 484 void gcm::bu_schedule(container_node* c, node* n) {
 485         GCM_DUMP(
 486                 cerr << "bu scheduling : ";
 487                 dump::dump_op(n);
 488                 cerr << "\n";
 489         );
 490
 491         assert(op_map[n].bottom_bb == bu_bb);
 492
 493         bu_release_defs(n->src, true);
 494         bu_release_defs(n->dst, false);
 495
 496         c->push_front(n);
 497 }
 498
 499 void gcm::dump_uc_stack() {
 500         cerr << "##### uc_stk start ####\n";
 501         for (unsigned l = 0; l <= ucs_level; ++l) {
 502                 nuc_map &m = nuc_stk[l];
 503
 504                 cerr << "nuc_stk[" << l << "] :   @" << &m << "\n";
 505
 506                 for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) {
 507                         cerr << "    uc " << I->second << " for ";
 508                         dump::dump_op(I->first);
 509                         cerr << "\n";
 510                 }
 511         }
 512         cerr << "##### uc_stk end ####\n";
 513 }
 514
 515 void gcm::pop_uc_stack() {
 516         nuc_map &pm = nuc_stk[ucs_level];
 517         --ucs_level;
 518         nuc_map &cm = nuc_stk[ucs_level];
 519
 520         GCM_DUMP(
 521                 cerr << "merging use stack from level " << (ucs_level+1)
 522                         << " to " << ucs_level << "\n";
 523         );
 524
 525         for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) {
 526                 node *n = I->first;
 527
 528                 GCM_DUMP(
 529                         cerr << "      " << cm[n] << " += " << I->second << "  for ";
 530                         dump::dump_op(n);
 531                         cerr << "\n";
 532                 );
 533
 534                 unsigned uc = cm[n] += I->second;
 535
 536                 if (n->parent == &pending && uc == uses[n]) {
 537                         cm.erase(n);
 538                         pending_nodes.push_back(n);
 539                         GCM_DUMP(
 540                                 cerr << "pushed pending_node due to stack pop ";
 541                                 dump::dump_op(n);
 542                                 cerr << "\n";
 543                         );
 544                 }
 545         }
 546 }
 547
 548 void gcm::bu_find_best_bb(node *n, op_info &oi) {
 549
 550         GCM_DUMP(
 551                 cerr << "  find best bb : ";
 552                 dump::dump_op(n);
 553                 cerr << "\n";
 554         );
 555
 556         if (oi.bottom_bb)
 557                 return;
 558
 559         // don't hoist generated copies
 560         if (n->flags & NF_DONT_HOIST) {
 561                 oi.bottom_bb = bu_bb;
 562                 return;
 563         }
 564
 565         bb_node* best_bb = bu_bb;
 566         bb_node* top_bb = oi.top_bb;
 567         assert(oi.top_bb && !oi.bottom_bb);
 568
 569         node *c = best_bb;
 570
 571         // FIXME top_bb may be located inside the loop so we'll never enter it
 572         // in the loop below, and the instruction will be incorrectly placed at the
 573         // beginning of the shader.
 574         // For now just check if top_bb's loop_level is higher than of
 575         // current bb and abort the search for better bb in such case,
 576         // but this problem may require more complete (and more expensive) fix
 577         if (top_bb->loop_level <= best_bb->loop_level) {
 578                 while (c && c != top_bb) {
 579
 580                         if (c->prev) {
 581                                 c = c->prev;
 582                         } else {
 583                                 c = c->parent;
 584                                 if (!c)
 585                                         break;
 586                                 continue;
 587                         }
 588
 589                         if (c->subtype == NST_BB) {
 590                                 bb_node *bb = static_cast<bb_node*>(c);
 591                                 if (bb->loop_level < best_bb->loop_level)
 592                                         best_bb = bb;
 593                         }
 594                 }
 595         }
 596
 597         oi.bottom_bb = best_bb;
 598 }
 599
 600 void gcm::add_ready(node *n) {
 601         sched_queue_id sq = sh.get_queue_id(n);
 602         if (n->flags & NF_SCHEDULE_EARLY)
 603                 bu_ready_early[sq].push_back(n);
 604         else if (sq == SQ_ALU && n->is_copy_mov())
 605                 bu_ready[sq].push_front(n);
 606         else
 607                 bu_ready_next[sq].push_back(n);
 608 }
 609
 610 void gcm::bu_release_op(node * n) {
 611         op_info &oi = op_map[n];
 612
 613         GCM_DUMP(
 614         cerr << "  bu release op  ";
 615         dump::dump_op(n);
 616         );
 617
 618         nuc_stk[ucs_level].erase(n);
 619         pending.remove_node(n);
 620
 621         bu_find_best_bb(n, oi);
 622
 623         if (oi.bottom_bb == bu_bb) {
 624                 GCM_DUMP( cerr << "   ready\n";);
 625                 add_ready(n);
 626         } else {
 627                 GCM_DUMP( cerr << "   ready_above\n";);
 628                 ready_above.push_back(n);
 629         }
 630 }
 631
 632 void gcm::bu_release_phi_defs(container_node* p, unsigned op)
 633 {
 634         for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) {
 635                 node *o = *I;
 636                 value *v = o->src[op];
 637                 if (v && !v->is_readonly())
 638                         pending_defs.push_back(o->src[op]);
 639
 640         }
 641 }
 642
 643 unsigned gcm::get_uc_vec(vvec &vv) {
 644         unsigned c = 0;
 645         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 646                 value *v = *I;
 647                 if (!v)
 648                         continue;
 649
 650                 if (v->is_rel())
 651                         c += get_uc_vec(v->mdef);
 652                 else
 653                         c += v->use_count();
 654         }
 655         return c;
 656 }
 657
 658 void gcm::init_use_count(nuc_map& m, container_node &s) {
 659         m.clear();
 660         for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
 661                 node *n = *I;
 662                 unsigned uc = get_uc_vec(n->dst);
 663                 GCM_DUMP(
 664                         cerr << "uc " << uc << "  ";
 665                         dump::dump_op(n);
 666                         cerr << "\n";
 667                 );
 668                 if (!uc) {
 669                         pending_nodes.push_back(n);
 670                         GCM_DUMP(
 671                                 cerr << "pushed pending_node in init ";
 672                                 dump::dump_op(n);
 673                                 cerr << "\n";
 674                         );
 675
 676                 } else
 677                         m[n] = uc;
 678         }
 679 }
 680
 681 void gcm::bu_release_val(value* v) {
 682         node *n = v->any_def();
 683
 684         if (n && n->parent == &pending) {
 685                 nuc_map &m = nuc_stk[ucs_level];
 686                 unsigned uc = ++m[n];
 687                 unsigned uc2 = uses[n];
 688
 689                 if (live.add_val(v)) {
 690                         ++live_count;
 691                         GCM_DUMP ( cerr << "live_count: " << live_count << "\n"; );
 692                 }
 693
 694                 GCM_DUMP(
 695                         cerr << "release val ";
 696                         dump::dump_val(v);
 697                         cerr << "  for node ";
 698                         dump::dump_op(n);
 699                         cerr << "    new uc=" << uc << ", total " << uc2 << "\n";
 700                 );
 701
 702                 if (uc == uc2)
 703                         bu_release_op(n);
 704         }
 705
 706 }
 707
 708 void gcm::init_def_count(nuc_map& m, container_node& s) {
 709         m.clear();
 710         for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
 711                 node *n = *I;
 712                 unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false);
 713                 m[n] = dc;
 714
 715                 GCM_DUMP(
 716                         cerr << "dc " << dc << "  ";
 717                         dump::dump_op(n);
 718                         cerr << "\n";
 719                 );
 720         }
 721 }
 722
 723 unsigned gcm::get_dc_vec(vvec& vv, bool src) {
 724         unsigned c = 0;
 725         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 726                 value *v = *I;
 727                 if (!v || v->is_readonly())
 728                         continue;
 729
 730                 if (v->is_rel()) {
 731                         c += v->rel->def != NULL;
 732                         c += get_dc_vec(v->muse, true);
 733                 }
 734                 else if (src) {
 735                         c += v->def != NULL;
 736                         c += v->adef != NULL;
 737                 }
 738         }
 739         return c;
 740 }
 741
 742 unsigned gcm::real_alu_count(sched_queue& q, unsigned max) {
 743         sq_iterator I(q.begin()), E(q.end());
 744         unsigned c = 0;
 745
 746         while (I != E && c < max) {
 747                 node *n = *I;
 748                 if (n->is_alu_inst()) {
 749                         if (!n->is_copy_mov() || !n->src[0]->is_any_gpr())
 750                                 ++c;
 751                 } else if (n->is_alu_packed()) {
 752                         c += static_cast<container_node*>(n)->count();
 753                 }
 754                 ++I;
 755         }
 756
 757         return c;
 758 }
 759
 760 bool gcm::check_alu_ready_count(unsigned threshold) {
 761         unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold);
 762         if (r >= threshold)
 763                 return true;
 764         r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r);
 765         return r >= threshold;
 766 }
 767
 768 } // namespace r600_sb