src/gallium/drivers/r600/sb/sb_gcm.cpp

   1 /*
   2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Vadim Girlin
  25  */
  26
  27 #define GCM_DEBUG 0
  28
  29 #if GCM_DEBUG
  30 #define GCM_DUMP(a) do { a } while(0);
  31 #else
  32 #define GCM_DUMP(a)
  33 #endif
  34
  35 #include <map>
  36
  37 #include "sb_bc.h"
  38 #include "sb_shader.h"
  39 #include "sb_pass.h"
  40
  41 namespace r600_sb {
  42
  43 int gcm::run() {
  44
  45         GCM_DUMP( sblog << "==== GCM ==== \n"; sh.dump_ir(); );
  46
  47         collect_instructions(sh.root, true);
  48
  49         init_def_count(uses, pending);
  50
  51         for (node_iterator N, I = pending.begin(), E = pending.end();
  52                         I != E; I = N) {
  53                 N = I;
  54                 ++N;
  55                 node *o = *I;
  56
  57                 GCM_DUMP(
  58                         sblog << "pending : ";
  59                         dump::dump_op(o);
  60                         sblog << "\n";
  61                 );
  62
  63                 if (td_is_ready(o)) {
  64
  65                         GCM_DUMP(
  66                                 sblog << "  ready: ";
  67                                 dump::dump_op(o);
  68                                 sblog << "\n";
  69                         );
  70                         pending.remove_node(o);
  71                         ready.push_back(o);
  72                 } else {
  73                 }
  74         }
  75
  76         sched_early(sh.root);
  77
  78         if (!pending.empty()) {
  79                 sblog << "##### gcm_sched_early_pass: unscheduled ops:\n";
  80                 dump::dump_op(pending.front());
  81         }
  82
  83         assert(pending.empty());
  84
  85         GCM_DUMP( sh.dump_ir(); );
  86
  87         GCM_DUMP( sblog << "\n\n ############## gcm late\n\n"; );
  88
  89         collect_instructions(sh.root, false);
  90
  91         init_use_count(uses, pending);
  92
  93         sched_late(sh.root);
  94         if (!pending.empty()) {
  95                 sblog << "##### gcm_sched_late_pass: unscheduled ops:\n";
  96                 dump::dump_op(pending.front());
  97         }
  98
  99         assert(ucs_level == 0);
 100         assert(pending.empty());
 101
 102         return 0;
 103 }
 104
 105
 106 void gcm::collect_instructions(container_node *c, bool early_pass) {
 107         if (c->is_bb()) {
 108
 109                 if (early_pass) {
 110                         for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 111                                 node *n = *I;
 112                                 if (n->flags & NF_DONT_MOVE) {
 113                                         op_info &o = op_map[n];
 114                                         o.top_bb = o.bottom_bb = static_cast<bb_node*>(c);
 115                                 }
 116                         }
 117                 }
 118
 119                 pending.append_from(c);
 120                 return;
 121         }
 122
 123         for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 124                 if (I->is_container()) {
 125                         collect_instructions(static_cast<container_node*>(*I), early_pass);
 126                 }
 127         }
 128 }
 129
 130 void gcm::sched_early(container_node *n) {
 131
 132         region_node *r =
 133                         (n->type == NT_REGION) ? static_cast<region_node*>(n) : NULL;
 134
 135         if (r && r->loop_phi) {
 136                 sched_early(r->loop_phi);
 137         }
 138
 139         for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) {
 140                 if (I->type == NT_OP) {
 141                         node *op = *I;
 142                         if (op->subtype == NST_PHI) {
 143                                 td_release_uses(op->dst);
 144                         }
 145                 } else if (I->is_container()) {
 146                         if (I->subtype == NST_BB) {
 147                                 bb_node* bb = static_cast<bb_node*>(*I);
 148                                 td_sched_bb(bb);
 149                         } else {
 150                                 sched_early(static_cast<container_node*>(*I));
 151                         }
 152                 }
 153         }
 154
 155         if (r && r->phi) {
 156                 sched_early(r->phi);
 157         }
 158 }
 159
 160 void gcm::td_schedule(bb_node *bb, node *n) {
 161         GCM_DUMP(
 162                 sblog << "scheduling : ";
 163                 dump::dump_op(n);
 164                 sblog << "\n";
 165         );
 166         td_release_uses(n->dst);
 167
 168         bb->push_back(n);
 169
 170         op_map[n].top_bb = bb;
 171
 172 }
 173
 174 void gcm::td_sched_bb(bb_node* bb) {
 175         GCM_DUMP(
 176         sblog << "td scheduling BB_" << bb->id << "\n";
 177         );
 178
 179         while (!ready.empty()) {
 180                 for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E;
 181                                 I = N) {
 182                         N = I; ++N;
 183                         td_schedule(bb, *I);
 184                         ready.erase(I);
 185                 }
 186         }
 187 }
 188
 189 bool gcm::td_is_ready(node* n) {
 190         return uses[n] == 0;
 191 }
 192
 193 void gcm::td_release_val(value *v) {
 194
 195         GCM_DUMP(
 196                 sblog << "td checking uses: ";
 197                 dump::dump_val(v);
 198                 sblog << "\n";
 199         );
 200
 201         use_info *u = v->uses;
 202         while (u) {
 203                 if (u->op->parent != &pending) {
 204                         u = u->next;
 205                         continue;
 206                 }
 207
 208                 GCM_DUMP(
 209                         sblog << "td    used in ";
 210                         dump::dump_op(u->op);
 211                         sblog << "\n";
 212                 );
 213
 214                 if (--uses[u->op] == 0) {
 215                         GCM_DUMP(
 216                                 sblog << "td        released : ";
 217                                 dump::dump_op(u->op);
 218                                 sblog << "\n";
 219                         );
 220
 221                         pending.remove_node(u->op);
 222                         ready.push_back(u->op);
 223                 }
 224                 u = u->next;
 225         }
 226
 227 }
 228
 229 void gcm::td_release_uses(vvec& v) {
 230         for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) {
 231                 value *v = *I;
 232                 if (!v)
 233                         continue;
 234
 235                 if (v->is_rel())
 236                         td_release_uses(v->mdef);
 237                 else
 238                         td_release_val(v);
 239         }
 240 }
 241
 242 void gcm::sched_late(container_node *n) {
 243
 244         bool stack_pushed = false;
 245
 246         if (n->is_depart()) {
 247                 depart_node *d = static_cast<depart_node*>(n);
 248                 push_uc_stack();
 249                 stack_pushed = true;
 250                 bu_release_phi_defs(d->target->phi, d->dep_id);
 251         } else if (n->is_repeat()) {
 252                 repeat_node *r = static_cast<repeat_node*>(n);
 253                 assert(r->target->loop_phi);
 254                 push_uc_stack();
 255                 stack_pushed = true;
 256                 bu_release_phi_defs(r->target->loop_phi, r->rep_id);
 257         }
 258
 259         for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
 260                 if (I->is_container()) {
 261                         if (I->subtype == NST_BB) {
 262                                 bb_node* bb = static_cast<bb_node*>(*I);
 263                                 bu_sched_bb(bb);
 264                         } else {
 265                                 sched_late(static_cast<container_node*>(*I));
 266                         }
 267                 }
 268         }
 269
 270         if (n->type == NT_IF) {
 271                 if_node *f = static_cast<if_node*>(n);
 272                 if (f->cond)
 273                         pending_defs.push_back(f->cond);
 274         } else if (n->type == NT_REGION) {
 275                 region_node *r = static_cast<region_node*>(n);
 276                 if (r->loop_phi)
 277                         bu_release_phi_defs(r->loop_phi, 0);
 278         }
 279
 280         if (stack_pushed)
 281                 pop_uc_stack();
 282
 283 }
 284
 285 void gcm::bu_sched_bb(bb_node* bb) {
 286         GCM_DUMP(
 287         sblog << "bu scheduling BB_" << bb->id << "\n";
 288         );
 289
 290         bu_bb = bb;
 291
 292         if (!pending_nodes.empty()) {
 293                 GCM_DUMP(
 294                                 sblog << "pending nodes:\n";
 295                 );
 296
 297                 // TODO consider sorting the exports by array_base,
 298                 // possibly it can improve performance
 299
 300                 for (node_list::iterator I = pending_nodes.begin(),
 301                                 E = pending_nodes.end(); I != E; ++I) {
 302                         bu_release_op(*I);
 303                 }
 304                 pending_nodes.clear();
 305                 GCM_DUMP(
 306                         sblog << "pending nodes processed...\n";
 307                 );
 308         }
 309
 310
 311         if (!pending_defs.empty()) {
 312                 for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end();
 313                                 I != E; ++I) {
 314                         bu_release_val(*I);
 315                 }
 316                 pending_defs.clear();
 317         }
 318
 319         for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end();
 320                         I != E; I = N) {
 321                 N = I;
 322                 ++N;
 323                 node *n = *I;
 324                 if (op_map[n].bottom_bb == bb) {
 325                         add_ready(*I);
 326                         ready_above.erase(I);
 327                 }
 328         }
 329
 330         unsigned cnt_ready[SQ_NUM];
 331
 332         container_node *clause = NULL;
 333         unsigned last_inst_type = ~0;
 334         unsigned last_count = 0;
 335
 336         bool s = true;
 337         while (s) {
 338                 node *n;
 339
 340                 s = false;
 341
 342                 unsigned ready_mask = 0;
 343
 344                 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 345                         if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty())
 346                                 ready_mask |= (1 << sq);
 347                 }
 348
 349                 if (!ready_mask) {
 350                         for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 351                                 if (!bu_ready_early[sq].empty()) {
 352                                         node *n = bu_ready_early[sq].front();
 353                                         bu_ready_early[sq].pop_front();
 354                                         bu_ready[sq].push_back(n);
 355                                         break;
 356                                 }
 357                         }
 358                 }
 359
 360                 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 361
 362                         if (sq == SQ_CF && pending_exec_mask_update) {
 363                                 pending_exec_mask_update = false;
 364                                 sq = SQ_ALU;
 365                                 --sq;
 366                                 continue;
 367                         }
 368
 369                         if (!bu_ready_next[sq].empty())
 370                                 bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]);
 371
 372                         cnt_ready[sq] = bu_ready[sq].size();
 373
 374                         if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold &&
 375                                         cnt_ready[sq] < ctx.max_fetch/2 &&
 376                                         !bu_ready_next[SQ_ALU].empty()) {
 377                                 sq = SQ_ALU;
 378                                 --sq;
 379                                 continue;
 380                         }
 381
 382                         while (!bu_ready[sq].empty()) {
 383
 384                                 if (last_inst_type != sq) {
 385                                         clause = NULL;
 386                                         last_count = 0;
 387                                         last_inst_type = sq;
 388                                 }
 389
 390                                 // simple heuristic to limit register pressure,
 391                                 if (sq == SQ_ALU && live_count > rp_threshold &&
 392                                                 (!bu_ready[SQ_TEX].empty() ||
 393                                                  !bu_ready[SQ_VTX].empty() ||
 394                                                  !bu_ready_next[SQ_TEX].empty() ||
 395                                                  !bu_ready_next[SQ_VTX].empty())) {
 396                                         GCM_DUMP( sblog << "switching to fetch (regpressure)\n"; );
 397                                         break;
 398                                 }
 399
 400                                 n = bu_ready[sq].front();
 401
 402                                 // real count (e.g. SAMPLE_G will be expanded to 3 instructions,
 403                                 // 2 SET_GRAD_ + 1 SAMPLE_G
 404                                 unsigned ncnt = 1;
 405                                 if (n->is_fetch_inst() && n->src.size() == 12) {
 406                                         ncnt = 3;
 407                                 }
 408
 409                                 if ((sq == SQ_TEX || sq == SQ_VTX) &&
 410                                                 ((last_count >= ctx.max_fetch/2 &&
 411                                                 check_alu_ready_count(24)) ||
 412                                                                 last_count + ncnt > ctx.max_fetch))
 413                                         break;
 414                                 else if (sq == SQ_CF && last_count > 4 &&
 415                                                 check_alu_ready_count(24))
 416                                         break;
 417
 418                                 bu_ready[sq].pop_front();
 419
 420                                 if (sq != SQ_CF) {
 421                                         if (!clause) {
 422                                                 clause = sh.create_clause(sq == SQ_ALU ?
 423                                                                 NST_ALU_CLAUSE :
 424                                                                         sq == SQ_TEX ? NST_TEX_CLAUSE :
 425                                                                                         NST_VTX_CLAUSE);
 426                                                 bb->push_front(clause);
 427                                         }
 428                                 } else {
 429                                         clause = bb;
 430                                 }
 431
 432                                 bu_schedule(clause, n);
 433                                 s = true;
 434                                 last_count += ncnt;
 435                         }
 436                 }
 437         }
 438
 439         bu_bb = NULL;
 440
 441         GCM_DUMP(
 442                 sblog << "bu finished scheduling BB_" << bb->id << "\n";
 443         );
 444 }
 445
 446 void gcm::bu_release_defs(vvec& v, bool src) {
 447         for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) {
 448                 value *v = *I;
 449                 if (!v || v->is_readonly())
 450                         continue;
 451
 452                 if (v->is_rel()) {
 453                         if (!v->rel->is_readonly())
 454                                 bu_release_val(v->rel);
 455                         bu_release_defs(v->muse, true);
 456                 } else if (src)
 457                         bu_release_val(v);
 458                 else {
 459                         if (live.remove_val(v)) {
 460                                 --live_count;
 461                         }
 462                 }
 463         }
 464 }
 465
 466 void gcm::push_uc_stack() {
 467         GCM_DUMP(
 468                 sblog << "pushing use count stack prev_level " << ucs_level
 469                         << "   new level " << (ucs_level + 1) << "\n";
 470         );
 471         ++ucs_level;
 472         if (ucs_level == nuc_stk.size()) {
 473                 nuc_stk.resize(ucs_level + 1);
 474         }
 475         else {
 476                 nuc_stk[ucs_level].clear();
 477         }
 478 }
 479
 480 bool gcm::bu_is_ready(node* n) {
 481         nuc_map &cm = nuc_stk[ucs_level];
 482         nuc_map::iterator F = cm.find(n);
 483         unsigned uc = (F == cm.end() ? 0 : F->second);
 484         return uc == uses[n];
 485 }
 486
 487 void gcm::bu_schedule(container_node* c, node* n) {
 488         GCM_DUMP(
 489                 sblog << "bu scheduling : ";
 490                 dump::dump_op(n);
 491                 sblog << "\n";
 492         );
 493
 494         assert(op_map[n].bottom_bb == bu_bb);
 495
 496         bu_release_defs(n->src, true);
 497         bu_release_defs(n->dst, false);
 498
 499         c->push_front(n);
 500 }
 501
 502 void gcm::dump_uc_stack() {
 503         sblog << "##### uc_stk start ####\n";
 504         for (unsigned l = 0; l <= ucs_level; ++l) {
 505                 nuc_map &m = nuc_stk[l];
 506
 507                 sblog << "nuc_stk[" << l << "] :   @" << &m << "\n";
 508
 509                 for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) {
 510                         sblog << "    uc " << I->second << " for ";
 511                         dump::dump_op(I->first);
 512                         sblog << "\n";
 513                 }
 514         }
 515         sblog << "##### uc_stk end ####\n";
 516 }
 517
 518 void gcm::pop_uc_stack() {
 519         nuc_map &pm = nuc_stk[ucs_level];
 520         --ucs_level;
 521         nuc_map &cm = nuc_stk[ucs_level];
 522
 523         GCM_DUMP(
 524                 sblog << "merging use stack from level " << (ucs_level+1)
 525                         << " to " << ucs_level << "\n";
 526         );
 527
 528         for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) {
 529                 node *n = I->first;
 530
 531                 GCM_DUMP(
 532                         sblog << "      " << cm[n] << " += " << I->second << "  for ";
 533                         dump::dump_op(n);
 534                         sblog << "\n";
 535                 );
 536
 537                 unsigned uc = cm[n] += I->second;
 538
 539                 if (n->parent == &pending && uc == uses[n]) {
 540                         cm.erase(n);
 541                         pending_nodes.push_back(n);
 542                         GCM_DUMP(
 543                                 sblog << "pushed pending_node due to stack pop ";
 544                                 dump::dump_op(n);
 545                                 sblog << "\n";
 546                         );
 547                 }
 548         }
 549 }
 550
 551 void gcm::bu_find_best_bb(node *n, op_info &oi) {
 552
 553         GCM_DUMP(
 554                 sblog << "  find best bb : ";
 555                 dump::dump_op(n);
 556                 sblog << "\n";
 557         );
 558
 559         if (oi.bottom_bb)
 560                 return;
 561
 562         // don't hoist generated copies
 563         if (n->flags & NF_DONT_HOIST) {
 564                 oi.bottom_bb = bu_bb;
 565                 return;
 566         }
 567
 568         bb_node* best_bb = bu_bb;
 569         bb_node* top_bb = oi.top_bb;
 570         assert(oi.top_bb && !oi.bottom_bb);
 571
 572         node *c = best_bb;
 573
 574         // FIXME top_bb may be located inside the loop so we'll never enter it
 575         // in the loop below, and the instruction will be incorrectly placed at the
 576         // beginning of the shader.
 577         // For now just check if top_bb's loop_level is higher than of
 578         // current bb and abort the search for better bb in such case,
 579         // but this problem may require more complete (and more expensive) fix
 580         if (top_bb->loop_level <= best_bb->loop_level) {
 581                 while (c && c != top_bb) {
 582
 583                         if (c->prev) {
 584                                 c = c->prev;
 585                         } else {
 586                                 c = c->parent;
 587                                 if (!c)
 588                                         break;
 589                                 continue;
 590                         }
 591
 592                         if (c->subtype == NST_BB) {
 593                                 bb_node *bb = static_cast<bb_node*>(c);
 594                                 if (bb->loop_level < best_bb->loop_level)
 595                                         best_bb = bb;
 596                         }
 597                 }
 598         }
 599
 600         oi.bottom_bb = best_bb;
 601 }
 602
 603 void gcm::add_ready(node *n) {
 604         sched_queue_id sq = sh.get_queue_id(n);
 605         if (n->flags & NF_SCHEDULE_EARLY)
 606                 bu_ready_early[sq].push_back(n);
 607         else if (sq == SQ_ALU && n->is_copy_mov())
 608                 bu_ready[sq].push_front(n);
 609         else if (n->is_alu_inst()) {
 610                 alu_node *a = static_cast<alu_node*>(n);
 611                 if (a->bc.op_ptr->flags & AF_PRED && a->dst[2]) {
 612                         // PRED_SET instruction that updates exec mask
 613                         pending_exec_mask_update = true;
 614                 }
 615                 bu_ready_next[sq].push_back(n);
 616         } else
 617                 bu_ready_next[sq].push_back(n);
 618 }
 619
 620 void gcm::bu_release_op(node * n) {
 621         op_info &oi = op_map[n];
 622
 623         GCM_DUMP(
 624         sblog << "  bu release op  ";
 625         dump::dump_op(n);
 626         );
 627
 628         nuc_stk[ucs_level].erase(n);
 629         pending.remove_node(n);
 630
 631         bu_find_best_bb(n, oi);
 632
 633         if (oi.bottom_bb == bu_bb) {
 634                 GCM_DUMP( sblog << "   ready\n";);
 635                 add_ready(n);
 636         } else {
 637                 GCM_DUMP( sblog << "   ready_above\n";);
 638                 ready_above.push_back(n);
 639         }
 640 }
 641
 642 void gcm::bu_release_phi_defs(container_node* p, unsigned op)
 643 {
 644         for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) {
 645                 node *o = *I;
 646                 value *v = o->src[op];
 647                 if (v && !v->is_readonly())
 648                         pending_defs.push_back(o->src[op]);
 649
 650         }
 651 }
 652
 653 unsigned gcm::get_uc_vec(vvec &vv) {
 654         unsigned c = 0;
 655         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 656                 value *v = *I;
 657                 if (!v)
 658                         continue;
 659
 660                 if (v->is_rel())
 661                         c += get_uc_vec(v->mdef);
 662                 else
 663                         c += v->use_count();
 664         }
 665         return c;
 666 }
 667
 668 void gcm::init_use_count(nuc_map& m, container_node &s) {
 669         m.clear();
 670         for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
 671                 node *n = *I;
 672                 unsigned uc = get_uc_vec(n->dst);
 673                 GCM_DUMP(
 674                         sblog << "uc " << uc << "  ";
 675                         dump::dump_op(n);
 676                         sblog << "\n";
 677                 );
 678                 if (!uc) {
 679                         pending_nodes.push_back(n);
 680                         GCM_DUMP(
 681                                 sblog << "pushed pending_node in init ";
 682                                 dump::dump_op(n);
 683                                 sblog << "\n";
 684                         );
 685
 686                 } else
 687                         m[n] = uc;
 688         }
 689 }
 690
 691 void gcm::bu_release_val(value* v) {
 692         node *n = v->any_def();
 693
 694         if (n && n->parent == &pending) {
 695                 nuc_map &m = nuc_stk[ucs_level];
 696                 unsigned uc = ++m[n];
 697                 unsigned uc2 = uses[n];
 698
 699                 if (live.add_val(v)) {
 700                         ++live_count;
 701                         GCM_DUMP ( sblog << "live_count: " << live_count << "\n"; );
 702                 }
 703
 704                 GCM_DUMP(
 705                         sblog << "release val ";
 706                         dump::dump_val(v);
 707                         sblog << "  for node ";
 708                         dump::dump_op(n);
 709                         sblog << "    new uc=" << uc << ", total " << uc2 << "\n";
 710                 );
 711
 712                 if (uc == uc2)
 713                         bu_release_op(n);
 714         }
 715
 716 }
 717
 718 void gcm::init_def_count(nuc_map& m, container_node& s) {
 719         m.clear();
 720         for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
 721                 node *n = *I;
 722                 unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false);
 723                 m[n] = dc;
 724
 725                 GCM_DUMP(
 726                         sblog << "dc " << dc << "  ";
 727                         dump::dump_op(n);
 728                         sblog << "\n";
 729                 );
 730         }
 731 }
 732
 733 unsigned gcm::get_dc_vec(vvec& vv, bool src) {
 734         unsigned c = 0;
 735         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 736                 value *v = *I;
 737                 if (!v || v->is_readonly())
 738                         continue;
 739
 740                 if (v->is_rel()) {
 741                         c += v->rel->def != NULL;
 742                         c += get_dc_vec(v->muse, true);
 743                 }
 744                 else if (src) {
 745                         c += v->def != NULL;
 746                         c += v->adef != NULL;
 747                 }
 748         }
 749         return c;
 750 }
 751
 752 unsigned gcm::real_alu_count(sched_queue& q, unsigned max) {
 753         sq_iterator I(q.begin()), E(q.end());
 754         unsigned c = 0;
 755
 756         while (I != E && c < max) {
 757                 node *n = *I;
 758                 if (n->is_alu_inst()) {
 759                         if (!n->is_copy_mov() || !n->src[0]->is_any_gpr())
 760                                 ++c;
 761                 } else if (n->is_alu_packed()) {
 762                         c += static_cast<container_node*>(n)->count();
 763                 }
 764                 ++I;
 765         }
 766
 767         return c;
 768 }
 769
 770 bool gcm::check_alu_ready_count(unsigned threshold) {
 771         unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold);
 772         if (r >= threshold)
 773                 return true;
 774         r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r);
 775         return r >= threshold;
 776 }
 777
 778 } // namespace r600_sb