src/gallium/drivers/r600/sb/sb_gcm.cpp

   1 /*
   2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Vadim Girlin
  25  */
  26
  27 #define GCM_DEBUG 0
  28
  29 #if GCM_DEBUG
  30 #define GCM_DUMP(a) do { a } while(0);
  31 #else
  32 #define GCM_DUMP(a)
  33 #endif
  34
  35 #include <map>
  36
  37 #include "sb_bc.h"
  38 #include "sb_shader.h"
  39 #include "sb_pass.h"
  40 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE
  41
  42 namespace r600_sb {
  43
  44 int gcm::run() {
  45
  46         GCM_DUMP( sblog << "==== GCM ==== \n"; sh.dump_ir(); );
  47
  48         collect_instructions(sh.root, true);
  49
  50         init_def_count(uses, pending);
  51
  52         for (node_iterator N, I = pending.begin(), E = pending.end();
  53                         I != E; I = N) {
  54                 N = I;
  55                 ++N;
  56                 node *o = *I;
  57
  58                 GCM_DUMP(
  59                         sblog << "pending : ";
  60                         dump::dump_op(o);
  61                         sblog << "\n";
  62                 );
  63
  64                 if (td_is_ready(o)) {
  65
  66                         GCM_DUMP(
  67                                 sblog << "  ready: ";
  68                                 dump::dump_op(o);
  69                                 sblog << "\n";
  70                         );
  71                         pending.remove_node(o);
  72                         ready.push_back(o);
  73                 } else {
  74                 }
  75         }
  76
  77         sched_early(sh.root);
  78
  79         if (!pending.empty()) {
  80                 sblog << "##### gcm_sched_early_pass: unscheduled ops:\n";
  81                 dump::dump_op(pending.front());
  82         }
  83
  84         assert(pending.empty());
  85
  86         GCM_DUMP( sh.dump_ir(); );
  87
  88         GCM_DUMP( sblog << "\n\n ############## gcm late\n\n"; );
  89
  90         collect_instructions(sh.root, false);
  91
  92         init_use_count(uses, pending);
  93
  94         sched_late(sh.root);
  95         if (!pending.empty()) {
  96                 sblog << "##### gcm_sched_late_pass: unscheduled ops:\n";
  97                 dump::dump_op(pending.front());
  98         }
  99
 100         assert(ucs_level == 0);
 101         assert(pending.empty());
 102
 103         return 0;
 104 }
 105
 106
 107 void gcm::collect_instructions(container_node *c, bool early_pass) {
 108         if (c->is_bb()) {
 109
 110                 if (early_pass) {
 111                         for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 112                                 node *n = *I;
 113                                 if (n->flags & NF_DONT_MOVE) {
 114                                         op_info &o = op_map[n];
 115                                         o.top_bb = o.bottom_bb = static_cast<bb_node*>(c);
 116                                 }
 117                         }
 118                 }
 119
 120                 pending.append_from(c);
 121                 return;
 122         }
 123
 124         for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 125                 if (I->is_container()) {
 126                         collect_instructions(static_cast<container_node*>(*I), early_pass);
 127                 }
 128         }
 129 }
 130
 131 void gcm::sched_early(container_node *n) {
 132
 133         region_node *r =
 134                         (n->type == NT_REGION) ? static_cast<region_node*>(n) : NULL;
 135
 136         if (r && r->loop_phi) {
 137                 sched_early(r->loop_phi);
 138         }
 139
 140         for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) {
 141                 if (I->type == NT_OP) {
 142                         node *op = *I;
 143                         if (op->subtype == NST_PHI) {
 144                                 td_release_uses(op->dst);
 145                         }
 146                 } else if (I->is_container()) {
 147                         if (I->subtype == NST_BB) {
 148                                 bb_node* bb = static_cast<bb_node*>(*I);
 149                                 td_sched_bb(bb);
 150                         } else {
 151                                 sched_early(static_cast<container_node*>(*I));
 152                         }
 153                 }
 154         }
 155
 156         if (r && r->phi) {
 157                 sched_early(r->phi);
 158         }
 159 }
 160
 161 void gcm::td_schedule(bb_node *bb, node *n) {
 162         GCM_DUMP(
 163                 sblog << "scheduling : ";
 164                 dump::dump_op(n);
 165                 sblog << "\n";
 166         );
 167         td_release_uses(n->dst);
 168
 169         bb->push_back(n);
 170
 171         op_map[n].top_bb = bb;
 172
 173 }
 174
 175 void gcm::td_sched_bb(bb_node* bb) {
 176         GCM_DUMP(
 177         sblog << "td scheduling BB_" << bb->id << "\n";
 178         );
 179
 180         while (!ready.empty()) {
 181                 for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E;
 182                                 I = N) {
 183                         N = I; ++N;
 184                         td_schedule(bb, *I);
 185                         ready.erase(I);
 186                 }
 187         }
 188 }
 189
 190 bool gcm::td_is_ready(node* n) {
 191         return uses[n] == 0;
 192 }
 193
 194 void gcm::td_release_val(value *v) {
 195
 196         GCM_DUMP(
 197                 sblog << "td checking uses: ";
 198                 dump::dump_val(v);
 199                 sblog << "\n";
 200         );
 201
 202         for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) {
 203                 node *op = *I;
 204                 if (op->parent != &pending) {
 205                         continue;
 206                 }
 207
 208                 GCM_DUMP(
 209                         sblog << "td    used in ";
 210                         dump::dump_op(op);
 211                         sblog << "\n";
 212                 );
 213
 214                 assert(uses[op] > 0);
 215                 if (--uses[op] == 0) {
 216                         GCM_DUMP(
 217                                 sblog << "td        released : ";
 218                                 dump::dump_op(op);
 219                                 sblog << "\n";
 220                         );
 221
 222                         pending.remove_node(op);
 223                         ready.push_back(op);
 224                 }
 225         }
 226
 227 }
 228
 229 void gcm::td_release_uses(vvec& v) {
 230         for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) {
 231                 value *v = *I;
 232                 if (!v)
 233                         continue;
 234
 235                 if (v->is_rel())
 236                         td_release_uses(v->mdef);
 237                 else
 238                         td_release_val(v);
 239         }
 240 }
 241
 242 void gcm::sched_late(container_node *n) {
 243
 244         bool stack_pushed = false;
 245
 246         if (n->is_depart()) {
 247                 depart_node *d = static_cast<depart_node*>(n);
 248                 push_uc_stack();
 249                 stack_pushed = true;
 250                 bu_release_phi_defs(d->target->phi, d->dep_id);
 251         } else if (n->is_repeat()) {
 252                 repeat_node *r = static_cast<repeat_node*>(n);
 253                 assert(r->target->loop_phi);
 254                 push_uc_stack();
 255                 stack_pushed = true;
 256                 bu_release_phi_defs(r->target->loop_phi, r->rep_id);
 257         }
 258
 259         for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
 260                 if (I->is_container()) {
 261                         if (I->subtype == NST_BB) {
 262                                 bb_node* bb = static_cast<bb_node*>(*I);
 263                                 bu_sched_bb(bb);
 264                         } else {
 265                                 sched_late(static_cast<container_node*>(*I));
 266                         }
 267                 }
 268         }
 269
 270         if (n->type == NT_IF) {
 271                 if_node *f = static_cast<if_node*>(n);
 272                 if (f->cond)
 273                         pending_defs.push_back(f->cond);
 274         } else if (n->type == NT_REGION) {
 275                 region_node *r = static_cast<region_node*>(n);
 276                 if (r->loop_phi)
 277                         bu_release_phi_defs(r->loop_phi, 0);
 278         }
 279
 280         if (stack_pushed)
 281                 pop_uc_stack();
 282
 283 }
 284
 285 void gcm::bu_sched_bb(bb_node* bb) {
 286         GCM_DUMP(
 287         sblog << "bu scheduling BB_" << bb->id << "\n";
 288         );
 289
 290         bu_bb = bb;
 291
 292         if (!pending_nodes.empty()) {
 293                 GCM_DUMP(
 294                                 sblog << "pending nodes:\n";
 295                 );
 296
 297                 // TODO consider sorting the exports by array_base,
 298                 // possibly it can improve performance
 299
 300                 for (node_list::iterator I = pending_nodes.begin(),
 301                                 E = pending_nodes.end(); I != E; ++I) {
 302                         bu_release_op(*I);
 303                 }
 304                 pending_nodes.clear();
 305                 GCM_DUMP(
 306                         sblog << "pending nodes processed...\n";
 307                 );
 308         }
 309
 310
 311         if (!pending_defs.empty()) {
 312                 for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end();
 313                                 I != E; ++I) {
 314                         bu_release_val(*I);
 315                 }
 316                 pending_defs.clear();
 317         }
 318
 319         for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end();
 320                         I != E; I = N) {
 321                 N = I;
 322                 ++N;
 323                 node *n = *I;
 324                 if (op_map[n].bottom_bb == bb) {
 325                         add_ready(*I);
 326                         ready_above.erase(I);
 327                 }
 328         }
 329
 330         unsigned cnt_ready[SQ_NUM];
 331
 332         container_node *clause = NULL;
 333         unsigned last_inst_type = ~0;
 334         unsigned last_count = 0;
 335
 336         bool s = true;
 337         while (s) {
 338                 node *n;
 339
 340                 s = false;
 341
 342                 unsigned ready_mask = 0;
 343
 344                 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 345                         if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty())
 346                                 ready_mask |= (1 << sq);
 347                 }
 348
 349                 if (!ready_mask) {
 350                         for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 351                                 if (!bu_ready_early[sq].empty()) {
 352                                         node *n = bu_ready_early[sq].front();
 353                                         bu_ready_early[sq].pop_front();
 354                                         bu_ready[sq].push_back(n);
 355                                         break;
 356                                 }
 357                         }
 358                 }
 359
 360                 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
 361
 362                         if (sq == SQ_CF && pending_exec_mask_update) {
 363                                 pending_exec_mask_update = false;
 364                                 sq = SQ_ALU;
 365                                 --sq;
 366                                 continue;
 367                         }
 368
 369                         if (!bu_ready_next[sq].empty())
 370                                 bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]);
 371
 372                         cnt_ready[sq] = bu_ready[sq].size();
 373
 374                         if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold &&
 375                                         cnt_ready[sq] < ctx.max_fetch/2 &&
 376                                         !bu_ready_next[SQ_ALU].empty()) {
 377                                 sq = SQ_ALU;
 378                                 --sq;
 379                                 continue;
 380                         }
 381
 382                         while (!bu_ready[sq].empty()) {
 383
 384                                 if (last_inst_type != sq) {
 385                                         clause = NULL;
 386                                         last_count = 0;
 387                                         last_inst_type = sq;
 388                                 }
 389
 390                                 // simple heuristic to limit register pressure,
 391                                 if (sq == SQ_ALU && live_count > rp_threshold &&
 392                                                 (!bu_ready[SQ_TEX].empty() ||
 393                                                  !bu_ready[SQ_VTX].empty() ||
 394                                                  !bu_ready_next[SQ_TEX].empty() ||
 395                                                  !bu_ready_next[SQ_VTX].empty())) {
 396                                         GCM_DUMP( sblog << "switching to fetch (regpressure)\n"; );
 397                                         break;
 398                                 }
 399
 400                                 n = bu_ready[sq].front();
 401
 402                                 // real count (e.g. SAMPLE_G will be expanded to 3 instructions,
 403                                 // 2 SET_GRAD_ + 1 SAMPLE_G
 404                                 unsigned ncnt = 1;
 405                                 if (n->is_fetch_inst() && n->src.size() == 12) {
 406                                         ncnt = 3;
 407                                 }
 408
 409                                 bool sampler_indexing = false;
 410                                 if (n->is_fetch_inst() &&
 411                                         static_cast<fetch_node *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE)
 412                                 {
 413                                         sampler_indexing = true; // Give sampler indexed ops get their own clause
 414                                         ncnt = sh.get_ctx().is_cayman() ? 2 : 3; // MOVA + SET_CF_IDX0/1
 415                                 }
 416
 417                                 if ((sq == SQ_TEX || sq == SQ_VTX) &&
 418                                                 ((last_count >= ctx.max_fetch/2 &&
 419                                                 check_alu_ready_count(24)) ||
 420                                                                 last_count + ncnt > ctx.max_fetch))
 421                                         break;
 422                                 else if (sq == SQ_CF && last_count > 4 &&
 423                                                 check_alu_ready_count(24))
 424                                         break;
 425
 426                                 bu_ready[sq].pop_front();
 427
 428                                 if (sq != SQ_CF) {
 429                                         if (!clause || sampler_indexing) {
 430                                                 clause = sh.create_clause(sq == SQ_ALU ?
 431                                                                 NST_ALU_CLAUSE :
 432                                                                         sq == SQ_TEX ? NST_TEX_CLAUSE :
 433                                                                                         NST_VTX_CLAUSE);
 434                                                 bb->push_front(clause);
 435                                         }
 436                                 } else {
 437                                         clause = bb;
 438                                 }
 439
 440                                 bu_schedule(clause, n);
 441                                 s = true;
 442                                 last_count += ncnt;
 443                         }
 444                 }
 445         }
 446
 447         bu_bb = NULL;
 448
 449         GCM_DUMP(
 450                 sblog << "bu finished scheduling BB_" << bb->id << "\n";
 451         );
 452 }
 453
 454 void gcm::bu_release_defs(vvec& v, bool src) {
 455         for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) {
 456                 value *v = *I;
 457                 if (!v || v->is_readonly())
 458                         continue;
 459
 460                 if (v->is_rel()) {
 461                         if (!v->rel->is_readonly())
 462                                 bu_release_val(v->rel);
 463                         bu_release_defs(v->muse, true);
 464                 } else if (src)
 465                         bu_release_val(v);
 466                 else {
 467                         if (live.remove_val(v)) {
 468                                 --live_count;
 469                         }
 470                 }
 471         }
 472 }
 473
 474 void gcm::push_uc_stack() {
 475         GCM_DUMP(
 476                 sblog << "pushing use count stack prev_level " << ucs_level
 477                         << "   new level " << (ucs_level + 1) << "\n";
 478         );
 479         ++ucs_level;
 480         if (ucs_level == nuc_stk.size()) {
 481                 nuc_stk.resize(ucs_level + 1);
 482         }
 483         else {
 484                 nuc_stk[ucs_level].clear();
 485         }
 486 }
 487
 488 bool gcm::bu_is_ready(node* n) {
 489         nuc_map &cm = nuc_stk[ucs_level];
 490         nuc_map::iterator F = cm.find(n);
 491         unsigned uc = (F == cm.end() ? 0 : F->second);
 492         return uc == uses[n];
 493 }
 494
 495 void gcm::bu_schedule(container_node* c, node* n) {
 496         GCM_DUMP(
 497                 sblog << "bu scheduling : ";
 498                 dump::dump_op(n);
 499                 sblog << "\n";
 500         );
 501
 502         assert(op_map[n].bottom_bb == bu_bb);
 503
 504         bu_release_defs(n->src, true);
 505         bu_release_defs(n->dst, false);
 506
 507         c->push_front(n);
 508 }
 509
 510 void gcm::dump_uc_stack() {
 511         sblog << "##### uc_stk start ####\n";
 512         for (unsigned l = 0; l <= ucs_level; ++l) {
 513                 nuc_map &m = nuc_stk[l];
 514
 515                 sblog << "nuc_stk[" << l << "] :   @" << &m << "\n";
 516
 517                 for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) {
 518                         sblog << "    uc " << I->second << " for ";
 519                         dump::dump_op(I->first);
 520                         sblog << "\n";
 521                 }
 522         }
 523         sblog << "##### uc_stk end ####\n";
 524 }
 525
 526 void gcm::pop_uc_stack() {
 527         nuc_map &pm = nuc_stk[ucs_level];
 528         --ucs_level;
 529         nuc_map &cm = nuc_stk[ucs_level];
 530
 531         GCM_DUMP(
 532                 sblog << "merging use stack from level " << (ucs_level+1)
 533                         << " to " << ucs_level << "\n";
 534         );
 535
 536         for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) {
 537                 node *n = I->first;
 538
 539                 GCM_DUMP(
 540                         sblog << "      " << cm[n] << " += " << I->second << "  for ";
 541                         dump::dump_op(n);
 542                         sblog << "\n";
 543                 );
 544
 545                 unsigned uc = cm[n] += I->second;
 546
 547                 if (n->parent == &pending && uc == uses[n]) {
 548                         cm.erase(n);
 549                         pending_nodes.push_back(n);
 550                         GCM_DUMP(
 551                                 sblog << "pushed pending_node due to stack pop ";
 552                                 dump::dump_op(n);
 553                                 sblog << "\n";
 554                         );
 555                 }
 556         }
 557 }
 558
 559 void gcm::bu_find_best_bb(node *n, op_info &oi) {
 560
 561         GCM_DUMP(
 562                 sblog << "  find best bb : ";
 563                 dump::dump_op(n);
 564                 sblog << "\n";
 565         );
 566
 567         if (oi.bottom_bb)
 568                 return;
 569
 570         // don't hoist generated copies
 571         if (n->flags & NF_DONT_HOIST) {
 572                 oi.bottom_bb = bu_bb;
 573                 return;
 574         }
 575
 576         bb_node* best_bb = bu_bb;
 577         bb_node* top_bb = oi.top_bb;
 578         assert(oi.top_bb && !oi.bottom_bb);
 579
 580         node *c = best_bb;
 581
 582         // FIXME top_bb may be located inside the loop so we'll never enter it
 583         // in the loop below, and the instruction will be incorrectly placed at the
 584         // beginning of the shader.
 585         // For now just check if top_bb's loop_level is higher than of
 586         // current bb and abort the search for better bb in such case,
 587         // but this problem may require more complete (and more expensive) fix
 588         if (top_bb->loop_level <= best_bb->loop_level) {
 589                 while (c && c != top_bb) {
 590
 591                         if (c->prev) {
 592                                 c = c->prev;
 593                         } else {
 594                                 c = c->parent;
 595                                 if (!c)
 596                                         break;
 597                                 continue;
 598                         }
 599
 600                         if (c->subtype == NST_BB) {
 601                                 bb_node *bb = static_cast<bb_node*>(c);
 602                                 if (bb->loop_level < best_bb->loop_level)
 603                                         best_bb = bb;
 604                         }
 605                 }
 606         }
 607
 608         oi.bottom_bb = best_bb;
 609 }
 610
 611 void gcm::add_ready(node *n) {
 612         sched_queue_id sq = sh.get_queue_id(n);
 613         if (n->flags & NF_SCHEDULE_EARLY)
 614                 bu_ready_early[sq].push_back(n);
 615         else if (sq == SQ_ALU && n->is_copy_mov())
 616                 bu_ready[sq].push_front(n);
 617         else if (n->is_alu_inst()) {
 618                 alu_node *a = static_cast<alu_node*>(n);
 619                 if (a->bc.op_ptr->flags & AF_PRED && a->dst[2]) {
 620                         // PRED_SET instruction that updates exec mask
 621                         pending_exec_mask_update = true;
 622                 }
 623                 bu_ready_next[sq].push_back(n);
 624         } else
 625                 bu_ready_next[sq].push_back(n);
 626 }
 627
 628 void gcm::bu_release_op(node * n) {
 629         op_info &oi = op_map[n];
 630
 631         GCM_DUMP(
 632         sblog << "  bu release op  ";
 633         dump::dump_op(n);
 634         );
 635
 636         nuc_stk[ucs_level].erase(n);
 637         pending.remove_node(n);
 638
 639         bu_find_best_bb(n, oi);
 640
 641         if (oi.bottom_bb == bu_bb) {
 642                 GCM_DUMP( sblog << "   ready\n";);
 643                 add_ready(n);
 644         } else {
 645                 GCM_DUMP( sblog << "   ready_above\n";);
 646                 ready_above.push_back(n);
 647         }
 648 }
 649
 650 void gcm::bu_release_phi_defs(container_node* p, unsigned op)
 651 {
 652         for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) {
 653                 node *o = *I;
 654                 value *v = o->src[op];
 655                 if (v && !v->is_readonly())
 656                         pending_defs.push_back(o->src[op]);
 657
 658         }
 659 }
 660
 661 unsigned gcm::get_uc_vec(vvec &vv) {
 662         unsigned c = 0;
 663         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 664                 value *v = *I;
 665                 if (!v)
 666                         continue;
 667
 668                 if (v->is_rel())
 669                         c += get_uc_vec(v->mdef);
 670                 else
 671                         c += v->use_count();
 672         }
 673         return c;
 674 }
 675
 676 void gcm::init_use_count(nuc_map& m, container_node &s) {
 677         m.clear();
 678         for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
 679                 node *n = *I;
 680                 unsigned uc = get_uc_vec(n->dst);
 681                 GCM_DUMP(
 682                         sblog << "uc " << uc << "  ";
 683                         dump::dump_op(n);
 684                         sblog << "\n";
 685                 );
 686                 if (!uc) {
 687                         pending_nodes.push_back(n);
 688                         GCM_DUMP(
 689                                 sblog << "pushed pending_node in init ";
 690                                 dump::dump_op(n);
 691                                 sblog << "\n";
 692                         );
 693
 694                 } else
 695                         m[n] = uc;
 696         }
 697 }
 698
 699 void gcm::bu_release_val(value* v) {
 700         node *n = v->any_def();
 701
 702         if (n && n->parent == &pending) {
 703                 nuc_map &m = nuc_stk[ucs_level];
 704                 unsigned uc = ++m[n];
 705                 unsigned uc2 = uses[n];
 706
 707                 if (live.add_val(v)) {
 708                         ++live_count;
 709                         GCM_DUMP ( sblog << "live_count: " << live_count << "\n"; );
 710                 }
 711
 712                 GCM_DUMP(
 713                         sblog << "release val ";
 714                         dump::dump_val(v);
 715                         sblog << "  for node ";
 716                         dump::dump_op(n);
 717                         sblog << "    new uc=" << uc << ", total " << uc2 << "\n";
 718                 );
 719
 720                 if (uc == uc2)
 721                         bu_release_op(n);
 722         }
 723
 724 }
 725
 726 void gcm::init_def_count(nuc_map& m, container_node& s) {
 727         m.clear();
 728         for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
 729                 node *n = *I;
 730                 unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false);
 731                 m[n] = dc;
 732
 733                 GCM_DUMP(
 734                         sblog << "dc " << dc << "  ";
 735                         dump::dump_op(n);
 736                         sblog << "\n";
 737                 );
 738         }
 739 }
 740
 741 unsigned gcm::get_dc_vec(vvec& vv, bool src) {
 742         unsigned c = 0;
 743         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 744                 value *v = *I;
 745                 if (!v || v->is_readonly())
 746                         continue;
 747
 748                 if (v->is_rel()) {
 749                         c += v->rel->def != NULL;
 750                         c += get_dc_vec(v->muse, true);
 751                 }
 752                 else if (src) {
 753                         c += v->def != NULL;
 754                         c += v->adef != NULL;
 755                 }
 756         }
 757         return c;
 758 }
 759
 760 unsigned gcm::real_alu_count(sched_queue& q, unsigned max) {
 761         sq_iterator I(q.begin()), E(q.end());
 762         unsigned c = 0;
 763
 764         while (I != E && c < max) {
 765                 node *n = *I;
 766                 if (n->is_alu_inst()) {
 767                         if (!n->is_copy_mov() || !n->src[0]->is_any_gpr())
 768                                 ++c;
 769                 } else if (n->is_alu_packed()) {
 770                         c += static_cast<container_node*>(n)->count();
 771                 }
 772                 ++I;
 773         }
 774
 775         return c;
 776 }
 777
 778 bool gcm::check_alu_ready_count(unsigned threshold) {
 779         unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold);
 780         if (r >= threshold)
 781                 return true;
 782         r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r);
 783         return r >= threshold;
 784 }
 785
 786 } // namespace r600_sb