src/gallium/drivers/r600/sb/sb_gcm.cpp

   1 /*
   2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Vadim Girlin
  25  */
  26
  27 #define GCM_DEBUG 0
  28
  29 #if GCM_DEBUG
  30 #define GCM_DUMP(a) do { a } while(0);
  31 #else
  32 #define GCM_DUMP(a)
  33 #endif
  34
  35 #include <map>
  36
  37 #include "sb_bc.h"
  38 #include "sb_shader.h"
  39 #include "sb_pass.h"
  40 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE
  41
  42 namespace r600_sb {
  43
  44 int gcm::run() {
  45
  46         GCM_DUMP( sblog << "==== GCM ==== \n"; sh.dump_ir(); );
  47
  48         collect_instructions(sh.root, true);
  49
  50         init_def_count(uses, pending);
  51
  52         for (node_iterator N, I = pending.begin(), E = pending.end();
  53                         I != E; I = N) {
  54                 N = I;
  55                 ++N;
  56                 node *o = *I;
  57
  58                 GCM_DUMP(
  59                         sblog << "pending : ";
  60                         dump::dump_op(o);
  61                         sblog << "\n";
  62                 );
  63
  64                 if (td_is_ready(o)) {
  65
  66                         GCM_DUMP(
  67                                 sblog << "  ready: ";
  68                                 dump::dump_op(o);
  69                                 sblog << "\n";
  70                         );
  71                         pending.remove_node(o);
  72                         ready.push_back(o);
  73                 } else {
  74                 }
  75         }
  76
  77         sched_early(sh.root);
  78
  79         if (!pending.empty()) {
  80                 sblog << "##### gcm_sched_early_pass: unscheduled ops:\n";
  81                 dump::dump_op(pending.front());
  82         }
  83
  84         assert(pending.empty());
  85
  86         GCM_DUMP( sh.dump_ir(); );
  87
  88         GCM_DUMP( sblog << "\n\n ############## gcm late\n\n"; );
  89
  90         collect_instructions(sh.root, false);
  91
  92         init_use_count(uses, pending);
  93
  94         sched_late(sh.root);
  95         if (!pending.empty()) {
  96                 sblog << "##### gcm_sched_late_pass: unscheduled ops:\n";
  97                 dump::dump_op(pending.front());
  98         }
  99
 100         assert(ucs_level == 0);
 101         assert(pending.empty());
 102
 103         return 0;
 104 }
 105
 106
 107 void gcm::collect_instructions(container_node *c, bool early_pass) {
 108         if (c->is_bb()) {
 109
 110                 if (early_pass) {
 111                         for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 112                                 node *n = *I;
 113                                 if (n->flags & NF_DONT_MOVE) {
 114                                         op_info &o = op_map[n];
 115                                         o.top_bb = o.bottom_bb = static_cast<bb_node*>(c);
 116                                 }
 117                         }
 118                 }
 119
 120                 pending.append_from(c);
 121                 return;
 122         }
 123
 124         for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 125                 if (I->is_container()) {
 126                         collect_instructions(static_cast<container_node*>(*I), early_pass);
 127                 }
 128         }
 129 }
 130
 131 void gcm::sched_early(container_node *n) {
 132
 133         region_node *r =
 134                         (n->type == NT_REGION) ? static_cast<region_node*>(n) : NULL;
 135
 136         if (r && r->loop_phi) {
 137                 sched_early(r->loop_phi);
 138         }
 139
 140         for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) {
 141                 if (I->type == NT_OP) {
 142                         node *op = *I;
 143                         if (op->subtype == NST_PHI) {
 144                                 td_release_uses(op->dst);
 145                         }
 146                 } else if (I->is_container()) {
 147                         if (I->subtype == NST_BB) {
 148                                 bb_node* bb = static_cast<bb_node*>(*I);
 149                                 td_sched_bb(bb);
 150                         } else {
 151                                 sched_early(static_cast<container_node*>(*I));
 152                         }
 153                 }
 154         }
 155
 156         if (r && r->phi) {
 157                 sched_early(r->phi);
 158         }
 159 }
 160
 161 void gcm::td_schedule(bb_node *bb, node *n) {
 162         GCM_DUMP(
 163                 sblog << "scheduling : ";
 164                 dump::dump_op(n);
 165                 sblog << "\n";
 166         );
 167         td_release_uses(n->dst);
 168
 169         bb->push_back(n);
 170
 171         op_map[n].top_bb = bb;
 172
 173 }
 174
 175 void gcm::td_sched_bb(bb_node* bb) {
 176         GCM_DUMP(
 177         sblog << "td scheduling BB_" << bb->id << "\n";
 178         );
 179
 180         while (!ready.empty()) {
 181                 for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E;
 182                                 I = N) {
 183                         N = I; ++N;
 184                         td_schedule(bb, *I);
 185                         ready.erase(I);
 186                 }
 187         }
 188 }
 189
 190 bool gcm::td_is_ready(node* n) {
 191         return uses[n] == 0;
 192 }
 193
 194 void gcm::td_release_val(value *v) {
 195
 196         GCM_DUMP(
 197                 sblog << "td checking uses: ";
 198                 dump::dump_val(v);
 199                 sblog << "\n";
 200         );
 201
 202         for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) {
 203                 node *op = *I;
 204                 if (op->parent != &pending) {
 205                         continue;
 206                 }
 207
 208                 GCM_DUMP(
 209                         sblog << "td    used in ";
 210                         dump::dump_op(op);
 211                         sblog << "\n";
 212                 );
 213
 214                 assert(uses[op] > 0);
 215                 if (--uses[op] == 0) {
 216                         GCM_DUMP(
 217                                 sblog << "td        released : ";
 218                                 dump::dump_op(op);
 219                                 sblog << "\n";
 220                         );
 221
 222                         pending.remove_node(op);
 223                         ready.push_back(op);
 224                 }
 225         }
 226
 227 }
 228
 229 void gcm::td_release_uses(vvec& v) {
 230         for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) {
 231                 value *v = *I;
 232                 if (!v)
 233                         continue;
 234
 235                 if (v->is_rel())
 236                         td_release_uses(v->mdef);
 237                 else
 238                         td_release_val(v);
 239         }
 240 }
 241
 242 void gcm::sched_late(container_node *n) {
 243
 244         bool stack_pushed = false;
 245
 246         if (n->is_depart()) {
 247                 depart_node *d = static_cast<depart_node*>(n);
 248                 push_uc_stack();
 249                 stack_pushed = true;
 250                 bu_release_phi_defs(d->target->phi, d->dep_id);
 251         } else if (n->is_repeat()) {
 252                 repeat_node *r = static_cast<repeat_node*>(n);
 253                 assert(r->target->loop_phi);
 254                 push_uc_stack();
 255                 stack_pushed = true;
 256                 bu_release_phi_defs(r->target->loop_phi, r->rep_id);
 257         }
 258
 259         for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
 260                 if (I->is_container()) {
 261                         if (I->subtype == NST_BB) {
 262                                 bb_node* bb = static_cast<bb_node*>(*I);
 263                                 bu_sched_bb(bb);
 264                         } else {
 265                                 sched_late(static_cast<container_node*>(*I));
 266                         }
 267                 }
 268         }
 269
 270         if (n->type == NT_IF) {
 271                 if_node *f = static_cast<if_node*>(n);
 272                 if (f->cond)
 273                         pending_defs.push_back(f->cond);
 274         } else if (n->type == NT_REGION) {
 275                 region_node *r = static_cast<region_node*>(n);
 276                 if (r->loop_phi)
 277                         bu_release_phi_defs(r->loop_phi, 0);
 278         }
 279
 280         if (stack_pushed)
 281                 pop_uc_stack();
 282
 283 }
 284
 285 void gcm::bu_sched_bb(bb_node* bb) {
 286         GCM_DUMP(
 287         sblog << "bu scheduling BB_" << bb->id << "\n";
 288         );
 289
 290         bu_bb = bb;
 291
 292         if (!pending_nodes.empty()) {
 293                 GCM_DUMP(
 294                                 sblog << "pending nodes:\n";
 295                 );
 296
 297                 // TODO consider sorting the exports by array_base,
 298                 // possibly it can improve performance
 299
 300                 for (node_list::iterator I = pending_nodes.begin(),
 301                                 E = pending_nodes.end(); I != E; ++I) {
 302                         bu_release_op(*I);
 303                 }
 304                 pending_nodes.clear();
 305                 GCM_DUMP(
 306                         sblog << "pending nodes processed...\n";
 307                 );
 308         }
 309
 310
 311         if (!pending_defs.empty()) {
 312                 for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end();
 313                                 I != E; ++I) {
 314                         bu_release_val(*I);
 315                 }
 316                 pending_defs.clear();
 317         }
 318
 319         for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end();
 320                         I != E; I = N) {
 321                 N = I;
 322                 ++N;
 323                 node *n = *I;
 324                 if (op_map[n].bottom_bb == bb) {
 325                         add_ready(*I);
 326                         ready_above.erase(I);
 327                 }
 328         }
 329
 330         unsigned cnt_ready[SQ_NUM];
 331
 332         container_node *clause = NULL;
 333         unsigned last_inst_type = ~0;
 334         unsigned last_count = 0;
 335
 336         bool s = true;
 337         while (s) {
 338                 node *n;
 339
 340                 s = false;
 341
 342                 unsigned ready_mask = 0;
 343
 344                 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 345                         if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty())
 346                                 ready_mask |= (1 << sq);
 347                 }
 348
 349                 if (!ready_mask) {
 350                         for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 351                                 if (!bu_ready_early[sq].empty()) {
 352                                         node *n = bu_ready_early[sq].front();
 353                                         bu_ready_early[sq].pop_front();
 354                                         bu_ready[sq].push_back(n);
 355                                         break;
 356                                 }
 357                         }
 358                 }
 359
 360                 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 361
 362                         if (sq == SQ_CF && pending_exec_mask_update) {
 363                                 pending_exec_mask_update = false;
 364                                 sq = SQ_ALU;
 365                                 --sq;
 366                                 continue;
 367                         }
 368
 369                         if (sq != SQ_ALU && outstanding_lds_oq)
 370                                 continue;
 371
 372                         if (!bu_ready_next[sq].empty())
 373                                 bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]);
 374
 375                         cnt_ready[sq] = bu_ready[sq].size();
 376
 377                         if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold &&
 378                                         cnt_ready[sq] < ctx.max_fetch/2 &&
 379                                         !bu_ready_next[SQ_ALU].empty()) {
 380                                 sq = SQ_ALU;
 381                                 --sq;
 382                                 continue;
 383                         }
 384
 385                         while (!bu_ready[sq].empty()) {
 386
 387                                 if (last_inst_type != sq) {
 388                                         clause = NULL;
 389                                         last_count = 0;
 390                                         last_inst_type = sq;
 391                                 }
 392
 393                                 // simple heuristic to limit register pressure,
 394                                 if (sq == SQ_ALU && live_count > rp_threshold && !outstanding_lds_oq &&
 395                                                 (!bu_ready[SQ_TEX].empty() ||
 396                                                  !bu_ready[SQ_VTX].empty() ||
 397                                                  !bu_ready_next[SQ_TEX].empty() ||
 398                                                  !bu_ready_next[SQ_VTX].empty())) {
 399                                         GCM_DUMP( sblog << "switching to fetch (regpressure)\n"; );
 400                                         break;
 401                                 }
 402
 403                                 n = bu_ready[sq].front();
 404
 405                                 // real count (e.g. SAMPLE_G will be expanded to 3 instructions,
 406                                 // 2 SET_GRAD_ + 1 SAMPLE_G
 407                                 unsigned ncnt = 1;
 408                                 if (n->is_fetch_inst() && n->src.size() == 12) {
 409                                         ncnt = 3;
 410                                 }
 411
 412                                 bool sampler_indexing = false;
 413                                 if (n->is_fetch_inst() &&
 414                                         static_cast<fetch_node *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE)
 415                                 {
 416                                         sampler_indexing = true; // Give sampler indexed ops get their own clause
 417                                         ncnt = sh.get_ctx().is_cayman() ? 2 : 3; // MOVA + SET_CF_IDX0/1
 418                                 }
 419
 420                                 if ((sq == SQ_TEX || sq == SQ_VTX) &&
 421                                                 ((last_count >= ctx.max_fetch/2 &&
 422                                                 check_alu_ready_count(24)) ||
 423                                                                 last_count + ncnt > ctx.max_fetch))
 424                                         break;
 425                                 else if (sq == SQ_CF && last_count > 4 &&
 426                                                 check_alu_ready_count(24))
 427                                         break;
 428
 429
 430                                 if (sq == SQ_ALU && n->consumes_lds_oq() &&
 431                                     (bu_ready[SQ_TEX].size() || bu_ready[SQ_VTX].size() || bu_ready[SQ_GDS].size())) {
 432                                         GCM_DUMP( sblog << "switching scheduling due to lds op\n"; );
 433                                         break;
 434                                 }
 435                                 bu_ready[sq].pop_front();
 436
 437                                 if (sq != SQ_CF) {
 438                                         if (!clause || sampler_indexing) {
 439                                                 node_subtype nst;
 440                                                 switch (sq) {
 441                                                 case SQ_ALU:
 442                                                         nst = NST_ALU_CLAUSE;
 443                                                         break;
 444                                                 case SQ_TEX:
 445                                                         nst = NST_TEX_CLAUSE;
 446                                                         break;
 447                                                 case SQ_GDS:
 448                                                         nst = NST_GDS_CLAUSE;
 449                                                         break;
 450                                                 default:
 451                                                         nst = NST_VTX_CLAUSE;
 452                                                         break;
 453                                                 }
 454                                                 clause = sh.create_clause(nst);
 455                                                 bb->push_front(clause);
 456                                         }
 457                                 } else {
 458                                         clause = bb;
 459                                 }
 460
 461                                 bu_schedule(clause, n);
 462                                 s = true;
 463                                 last_count += ncnt;
 464                         }
 465                 }
 466         }
 467
 468         bu_bb = NULL;
 469
 470         GCM_DUMP(
 471                 sblog << "bu finished scheduling BB_" << bb->id << "\n";
 472         );
 473 }
 474
 475 void gcm::bu_release_defs(vvec& v, bool src) {
 476         for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) {
 477                 value *v = *I;
 478                 if (!v || v->is_readonly())
 479                         continue;
 480
 481                 if (v->is_rel()) {
 482                         if (!v->rel->is_readonly())
 483                                 bu_release_val(v->rel);
 484                         bu_release_defs(v->muse, true);
 485                 } else if (src)
 486                         bu_release_val(v);
 487                 else {
 488                         if (live.remove_val(v)) {
 489                                 --live_count;
 490                         }
 491                 }
 492         }
 493 }
 494
 495 void gcm::push_uc_stack() {
 496         GCM_DUMP(
 497                 sblog << "pushing use count stack prev_level " << ucs_level
 498                         << "   new level " << (ucs_level + 1) << "\n";
 499         );
 500         ++ucs_level;
 501         if (ucs_level == nuc_stk.size()) {
 502                 nuc_stk.resize(ucs_level + 1);
 503         }
 504         else {
 505                 nuc_stk[ucs_level].clear();
 506         }
 507 }
 508
 509 bool gcm::bu_is_ready(node* n) {
 510         nuc_map &cm = nuc_stk[ucs_level];
 511         nuc_map::iterator F = cm.find(n);
 512         unsigned uc = (F == cm.end() ? 0 : F->second);
 513         return uc == uses[n];
 514 }
 515
 516 void gcm::bu_schedule(container_node* c, node* n) {
 517         GCM_DUMP(
 518                 sblog << "bu scheduling : ";
 519                 dump::dump_op(n);
 520                 sblog << "\n";
 521         );
 522
 523         assert(op_map[n].bottom_bb == bu_bb);
 524
 525         if (n->produces_lds_oq())
 526                 outstanding_lds_oq--;
 527         if (n->consumes_lds_oq())
 528                 outstanding_lds_oq++;
 529         bu_release_defs(n->src, true);
 530         bu_release_defs(n->dst, false);
 531
 532         c->push_front(n);
 533 }
 534
 535 void gcm::dump_uc_stack() {
 536         sblog << "##### uc_stk start ####\n";
 537         for (unsigned l = 0; l <= ucs_level; ++l) {
 538                 nuc_map &m = nuc_stk[l];
 539
 540                 sblog << "nuc_stk[" << l << "] :   @" << &m << "\n";
 541
 542                 for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) {
 543                         sblog << "    uc " << I->second << " for ";
 544                         dump::dump_op(I->first);
 545                         sblog << "\n";
 546                 }
 547         }
 548         sblog << "##### uc_stk end ####\n";
 549 }
 550
 551 void gcm::pop_uc_stack() {
 552         nuc_map &pm = nuc_stk[ucs_level];
 553         --ucs_level;
 554         nuc_map &cm = nuc_stk[ucs_level];
 555
 556         GCM_DUMP(
 557                 sblog << "merging use stack from level " << (ucs_level+1)
 558                         << " to " << ucs_level << "\n";
 559         );
 560
 561         for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) {
 562                 node *n = I->first;
 563
 564                 GCM_DUMP(
 565                         sblog << "      " << cm[n] << " += " << I->second << "  for ";
 566                         dump::dump_op(n);
 567                         sblog << "\n";
 568                 );
 569
 570                 unsigned uc = cm[n] += I->second;
 571
 572                 if (n->parent == &pending && uc == uses[n]) {
 573                         cm.erase(n);
 574                         pending_nodes.push_back(n);
 575                         GCM_DUMP(
 576                                 sblog << "pushed pending_node due to stack pop ";
 577                                 dump::dump_op(n);
 578                                 sblog << "\n";
 579                         );
 580                 }
 581         }
 582 }
 583
 584 void gcm::bu_find_best_bb(node *n, op_info &oi) {
 585
 586         GCM_DUMP(
 587                 sblog << "  find best bb : ";
 588                 dump::dump_op(n);
 589                 sblog << "\n";
 590         );
 591
 592         if (oi.bottom_bb)
 593                 return;
 594
 595         // don't hoist generated copies
 596         if (n->flags & NF_DONT_HOIST) {
 597                 oi.bottom_bb = bu_bb;
 598                 return;
 599         }
 600
 601         bb_node* best_bb = bu_bb;
 602         bb_node* top_bb = oi.top_bb;
 603         assert(oi.top_bb && !oi.bottom_bb);
 604
 605         node *c = best_bb;
 606
 607         // FIXME top_bb may be located inside the loop so we'll never enter it
 608         // in the loop below, and the instruction will be incorrectly placed at the
 609         // beginning of the shader.
 610         // For now just check if top_bb's loop_level is higher than of
 611         // current bb and abort the search for better bb in such case,
 612         // but this problem may require more complete (and more expensive) fix
 613         if (top_bb->loop_level <= best_bb->loop_level) {
 614                 while (c && c != top_bb) {
 615
 616                         if (c->prev) {
 617                                 c = c->prev;
 618                         } else {
 619                                 c = c->parent;
 620                                 if (!c)
 621                                         break;
 622                                 continue;
 623                         }
 624
 625                         if (c->subtype == NST_BB) {
 626                                 bb_node *bb = static_cast<bb_node*>(c);
 627                                 if (bb->loop_level < best_bb->loop_level)
 628                                         best_bb = bb;
 629                         }
 630                 }
 631         }
 632
 633         oi.bottom_bb = best_bb;
 634 }
 635
 636 void gcm::add_ready(node *n) {
 637         sched_queue_id sq = sh.get_queue_id(n);
 638         if (n->flags & NF_SCHEDULE_EARLY)
 639                 bu_ready_early[sq].push_back(n);
 640         else if (sq == SQ_ALU && n->is_copy_mov())
 641                 bu_ready[sq].push_front(n);
 642         else if (n->is_alu_inst()) {
 643                 alu_node *a = static_cast<alu_node*>(n);
 644                 if (a->bc.op_ptr->flags & AF_PRED && a->dst[2]) {
 645                         // PRED_SET instruction that updates exec mask
 646                         pending_exec_mask_update = true;
 647                 }
 648                 bu_ready_next[sq].push_back(n);
 649         } else
 650                 bu_ready_next[sq].push_back(n);
 651 }
 652
 653 void gcm::bu_release_op(node * n) {
 654         op_info &oi = op_map[n];
 655
 656         GCM_DUMP(
 657         sblog << "  bu release op  ";
 658         dump::dump_op(n);
 659         );
 660
 661         nuc_stk[ucs_level].erase(n);
 662         pending.remove_node(n);
 663
 664         bu_find_best_bb(n, oi);
 665
 666         if (oi.bottom_bb == bu_bb) {
 667                 GCM_DUMP( sblog << "   ready\n";);
 668                 add_ready(n);
 669         } else {
 670                 GCM_DUMP( sblog << "   ready_above\n";);
 671                 ready_above.push_back(n);
 672         }
 673 }
 674
 675 void gcm::bu_release_phi_defs(container_node* p, unsigned op)
 676 {
 677         for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) {
 678                 node *o = *I;
 679                 value *v = o->src[op];
 680                 if (v && !v->is_readonly())
 681                         pending_defs.push_back(o->src[op]);
 682
 683         }
 684 }
 685
 686 unsigned gcm::get_uc_vec(vvec &vv) {
 687         unsigned c = 0;
 688         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 689                 value *v = *I;
 690                 if (!v)
 691                         continue;
 692
 693                 if (v->is_rel())
 694                         c += get_uc_vec(v->mdef);
 695                 else
 696                         c += v->use_count();
 697         }
 698         return c;
 699 }
 700
 701 void gcm::init_use_count(nuc_map& m, container_node &s) {
 702         m.clear();
 703         for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
 704                 node *n = *I;
 705                 unsigned uc = get_uc_vec(n->dst);
 706                 GCM_DUMP(
 707                         sblog << "uc " << uc << "  ";
 708                         dump::dump_op(n);
 709                         sblog << "\n";
 710                 );
 711                 if (!uc) {
 712                         pending_nodes.push_back(n);
 713                         GCM_DUMP(
 714                                 sblog << "pushed pending_node in init ";
 715                                 dump::dump_op(n);
 716                                 sblog << "\n";
 717                         );
 718
 719                 } else
 720                         m[n] = uc;
 721         }
 722 }
 723
 724 void gcm::bu_release_val(value* v) {
 725         node *n = v->any_def();
 726
 727         if (n && n->parent == &pending) {
 728                 nuc_map &m = nuc_stk[ucs_level];
 729                 unsigned uc = ++m[n];
 730                 unsigned uc2 = uses[n];
 731
 732                 if (live.add_val(v)) {
 733                         ++live_count;
 734                         GCM_DUMP ( sblog << "live_count: " << live_count << "\n"; );
 735                 }
 736
 737                 GCM_DUMP(
 738                         sblog << "release val ";
 739                         dump::dump_val(v);
 740                         sblog << "  for node ";
 741                         dump::dump_op(n);
 742                         sblog << "    new uc=" << uc << ", total " << uc2 << "\n";
 743                 );
 744
 745                 if (uc == uc2)
 746                         bu_release_op(n);
 747         }
 748
 749 }
 750
 751 void gcm::init_def_count(nuc_map& m, container_node& s) {
 752         m.clear();
 753         for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
 754                 node *n = *I;
 755                 unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false);
 756                 m[n] = dc;
 757
 758                 GCM_DUMP(
 759                         sblog << "dc " << dc << "  ";
 760                         dump::dump_op(n);
 761                         sblog << "\n";
 762                 );
 763         }
 764 }
 765
 766 unsigned gcm::get_dc_vec(vvec& vv, bool src) {
 767         unsigned c = 0;
 768         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 769                 value *v = *I;
 770                 if (!v || v->is_readonly())
 771                         continue;
 772
 773                 if (v->is_rel()) {
 774                         c += v->rel->def != NULL;
 775                         c += get_dc_vec(v->muse, true);
 776                 }
 777                 else if (src) {
 778                         c += v->def != NULL;
 779                         c += v->adef != NULL;
 780                 }
 781         }
 782         return c;
 783 }
 784
 785 unsigned gcm::real_alu_count(sched_queue& q, unsigned max) {
 786         sq_iterator I(q.begin()), E(q.end());
 787         unsigned c = 0;
 788
 789         while (I != E && c < max) {
 790                 node *n = *I;
 791                 if (n->is_alu_inst()) {
 792                         if (!n->is_copy_mov() || !n->src[0]->is_any_gpr())
 793                                 ++c;
 794                 } else if (n->is_alu_packed()) {
 795                         c += static_cast<container_node*>(n)->count();
 796                 }
 797                 ++I;
 798         }
 799
 800         return c;
 801 }
 802
 803 bool gcm::check_alu_ready_count(unsigned threshold) {
 804         unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold);
 805         if (r >= threshold)
 806                 return true;
 807         r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r);
 808         return r >= threshold;
 809 }
 810
 811 } // namespace r600_sb