src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp

   1 /*
   2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Vadim Girlin
  25  */
  26
  27 #define RA_DEBUG 0
  28
  29 #if RA_DEBUG
  30 #define RA_DUMP(q) do { q } while (0)
  31 #else
  32 #define RA_DUMP(q)
  33 #endif
  34
  35 #include "sb_shader.h"
  36 #include "sb_pass.h"
  37
  38 namespace r600_sb {
  39
  40 using std::cerr;
  41
  42 int ra_coalesce::run() {
  43         return sh.coal.run();
  44 }
  45
  46 void coalescer::add_edge(value* a, value* b, unsigned cost) {
  47         assert(a->is_sgpr() && b->is_sgpr());
  48         edges.insert(new ra_edge(a,b, cost));
  49 }
  50
  51 void coalescer::create_chunk(value *v) {
  52
  53         assert(v->is_sgpr());
  54
  55         ra_chunk *c = new ra_chunk();
  56
  57         c->values.push_back(v);
  58
  59         if (v->is_chan_pinned())
  60                 c->flags |= RCF_PIN_CHAN;
  61         if (v->is_reg_pinned()) {
  62                 c->flags |= RCF_PIN_REG;
  63         }
  64
  65         c->pin = v->pin_gpr;
  66
  67         RA_DUMP(
  68                 cerr << "create_chunk: ";
  69                 dump_chunk(c);
  70         );
  71
  72         all_chunks.push_back(c);
  73         v->chunk = c;
  74
  75 }
  76
  77 void coalescer::unify_chunks(ra_edge *e) {
  78         ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
  79
  80         RA_DUMP(
  81                 cerr << "unify_chunks: ";
  82                 dump_chunk(c1);
  83                 dump_chunk(c2);
  84         );
  85
  86         if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
  87                 c1->flags |= RCF_PIN_CHAN;
  88                 c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
  89         }
  90
  91         if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
  92                 c1->flags |= RCF_PIN_REG;
  93                 c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
  94         }
  95
  96         c1->values.reserve(c1->values.size() + c2->values.size());
  97
  98         for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
  99                         ++I) {
 100                 (*I)->chunk = c1;
 101                 c1->values.push_back(*I);
 102         }
 103
 104         chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
 105         assert(F != all_chunks.end());
 106
 107         all_chunks.erase(F);
 108
 109         c1->cost += c2->cost + e->cost;
 110         delete c2;
 111 }
 112
 113 bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
 114         unsigned pin_flags = (c1->flags & c2->flags) &
 115                         (RCF_PIN_CHAN | RCF_PIN_REG);
 116
 117         if ((pin_flags & RCF_PIN_CHAN) &&
 118                         c1->pin.chan() != c2->pin.chan())
 119                 return true;
 120
 121         if ((pin_flags & RCF_PIN_REG) &&
 122                         c1->pin.sel() != c2->pin.sel())
 123                 return true;
 124
 125         for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
 126                         ++I) {
 127                 value *v1 = *I;
 128
 129                 for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
 130                                 ++I) {
 131                         value *v2 = *I;
 132
 133                         if (!v1->v_equal(v2) && v1->interferences.contains(v2))
 134                                 return true;
 135                 }
 136         }
 137         return false;
 138 }
 139
 140 void coalescer::build_chunks() {
 141
 142         for (edge_queue::iterator I = edges.begin(), E = edges.end();
 143                         I != E; ++I) {
 144
 145                 ra_edge *e = *I;
 146
 147                 if (!e->a->chunk)
 148                         create_chunk(e->a);
 149
 150                 if (!e->b->chunk)
 151                         create_chunk(e->b);
 152
 153                 ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
 154
 155                 if (c1 == c2) {
 156                         c1->cost += e->cost;
 157                 } else if (!chunks_interference(c1, c2))
 158                         unify_chunks(e);
 159         }
 160 }
 161
 162 ra_constraint* coalescer::create_constraint(constraint_kind kind) {
 163         ra_constraint *c = new ra_constraint(kind);
 164         all_constraints.push_back(c);
 165         return c;
 166 }
 167
 168 void coalescer::dump_edges() {
 169         cerr << "######## affinity edges\n";
 170
 171         for (edge_queue::iterator I = edges.begin(), E = edges.end();
 172                         I != E; ++I) {
 173                 ra_edge* e = *I;
 174                 cerr << "  ra_edge ";
 175                 dump::dump_val(e->a);
 176                 cerr << " <-> ";
 177                 dump::dump_val(e->b);
 178                 cerr << "   cost = " << e->cost << "\n";
 179         }
 180 }
 181
 182 void coalescer::dump_chunks() {
 183         cerr << "######## chunks\n";
 184
 185         for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
 186                         I != E; ++I) {
 187                 ra_chunk* c = *I;
 188                 dump_chunk(c);
 189         }
 190 }
 191
 192
 193 void coalescer::dump_constraint_queue() {
 194         cerr << "######## constraints\n";
 195
 196         for (constraint_queue::iterator I = constraints.begin(),
 197                         E = constraints.end(); I != E; ++I) {
 198                 ra_constraint* c = *I;
 199                 dump_constraint(c);
 200         }
 201 }
 202
 203 void coalescer::dump_chunk(ra_chunk* c) {
 204         cerr << "  ra_chunk cost = " << c->cost << "  :  ";
 205         dump::dump_vec(c->values);
 206
 207         if (c->flags & RCF_PIN_REG)
 208                 cerr << "   REG = " << c->pin.sel();
 209
 210         if (c->flags & RCF_PIN_CHAN)
 211                 cerr << "   CHAN = " << c->pin.chan();
 212
 213         cerr << (c->flags & RCF_GLOBAL ? "  GLOBAL" : "");
 214
 215         cerr << "\n";
 216 }
 217
 218 void coalescer::dump_constraint(ra_constraint* c) {
 219         cerr << "  ra_constraint: ";
 220         switch (c->kind) {
 221                 case CK_PACKED_BS: cerr << "PACKED_BS"; break;
 222                 case CK_PHI: cerr << "PHI"; break;
 223                 case CK_SAME_REG: cerr << "SAME_REG"; break;
 224                 default: cerr << "UNKNOWN_KIND"; assert(0); break;
 225         }
 226
 227         cerr << "  cost = " << c->cost << "  : ";
 228         dump::dump_vec(c->values);
 229
 230         cerr << "\n";
 231 }
 232
 233 void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
 234
 235         for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
 236                         ++I) {
 237                 value *v = *I;
 238                 s.add_set(v->interferences);
 239         }
 240         s.remove_vec(c->values);
 241 }
 242
 243 void coalescer::build_chunk_queue() {
 244         for (chunk_vec::iterator I = all_chunks.begin(),
 245                         E = all_chunks.end(); I != E; ++I) {
 246                 ra_chunk *c = *I;
 247
 248                 if (!c->is_fixed())
 249                         chunks.insert(c);
 250         }
 251 }
 252
 253 void coalescer::build_constraint_queue() {
 254         for (constraint_vec::iterator I = all_constraints.begin(),
 255                         E = all_constraints.end(); I != E; ++I) {
 256                 ra_constraint *c = *I;
 257                 unsigned cost = 0;
 258
 259                 if (c->values.empty() || !c->values.front()->is_sgpr())
 260                         continue;
 261
 262                 if (c->kind != CK_SAME_REG)
 263                         continue;
 264
 265                 for (vvec::iterator I = c->values.begin(), E = c->values.end();
 266                                 I != E; ++I) {
 267                         value *v = *I;
 268                         if (!v->chunk)
 269                                 create_chunk(v);
 270                         else
 271                                 cost += v->chunk->cost;
 272                 }
 273                 c->cost = cost;
 274                 constraints.insert(c);
 275         }
 276 }
 277
 278 void coalescer::color_chunks() {
 279
 280         for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
 281                         I != E; ++I) {
 282                 ra_chunk *c = *I;
 283                 if (c->is_fixed() || c->values.size() == 1)
 284                         continue;
 285
 286                 sb_bitset rb;
 287                 val_set interf;
 288
 289                 get_chunk_interferences(c, interf);
 290
 291                 RA_DUMP(
 292                         cerr << "color_chunks: ";
 293                         dump_chunk(c);
 294                         cerr << "\n interferences: ";
 295                         dump::dump_set(sh,interf);
 296                         cerr << "\n";
 297                 );
 298
 299                 init_reg_bitset(rb, interf);
 300
 301                 unsigned pass = c->is_reg_pinned() ? 0 : 1;
 302
 303                 unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
 304                 unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
 305
 306                 unsigned color = 0;
 307
 308                 while (pass < 2) {
 309
 310                         unsigned rs, re;
 311
 312                         if (pass == 0) {
 313                                 rs = c->pin.sel();
 314                                 re = rs + 1;
 315                         } else {
 316                                 rs = 0;
 317                                 re = sh.num_nontemp_gpr();
 318                         }
 319
 320                         for (unsigned reg = rs; reg < re; ++reg) {
 321                                 for (unsigned chan = cs; chan < ce; ++chan) {
 322                                         unsigned bit = sel_chan(reg, chan);
 323                                         if (bit >= rb.size() || !rb.get(bit)) {
 324                                                 color = bit;
 325                                                 break;
 326                                         }
 327                                 }
 328                                 if (color)
 329                                         break;
 330                         }
 331
 332                         if (color)
 333                                 break;
 334
 335                         ++pass;
 336                 }
 337
 338                 assert(color);
 339                 color_chunk(c, color);
 340         }
 341 }
 342
 343 void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
 344
 345         for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
 346                 value *v = *I;
 347
 348                 if (!v->is_any_gpr())
 349                         continue;
 350
 351                 unsigned gpr = v->get_final_gpr();
 352                 if (!gpr)
 353                         continue;
 354
 355                 if (gpr) {
 356                         if (gpr >= bs.size())
 357                                 bs.resize(gpr + 64);
 358                         bs.set(gpr, 1);
 359                 }
 360         }
 361 }
 362
 363 void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
 364
 365         vvec vv = c->values;
 366
 367         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
 368                         ++I) {
 369                 value *v = *I;
 370
 371                 if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
 372                         detach_value(v);
 373                         continue;
 374                 }
 375
 376                 if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
 377                         detach_value(v);
 378                         continue;
 379                 }
 380
 381                 v->gpr = color;
 382
 383                 if (v->constraint && v->constraint->kind == CK_PHI)
 384                         v->fix();
 385
 386
 387                 RA_DUMP(
 388                         cerr << " assigned " << color << " to ";
 389                         dump::dump_val(v);
 390                         cerr << "\n";
 391                 );
 392         }
 393
 394         c->pin = color;
 395
 396         if (c->is_reg_pinned()) {
 397                 c->fix();
 398         }
 399 }
 400
 401 coalescer::~coalescer() {
 402
 403         // FIXME use pool allocator ??
 404
 405         for (constraint_vec::iterator I = all_constraints.begin(),
 406                         E = all_constraints.end(); I != E; ++I) {
 407                 delete (*I);
 408         }
 409
 410         for (chunk_vec::iterator I = all_chunks.begin(),
 411                         E = all_chunks.end(); I != E; ++I) {
 412                 delete (*I);
 413         }
 414
 415         for (edge_queue::iterator I = edges.begin(), E = edges.end();
 416                         I != E; ++I) {
 417                 delete (*I);
 418         }
 419 }
 420
 421 int coalescer::run() {
 422         int r;
 423
 424         RA_DUMP( dump_edges(); );
 425
 426         build_chunks();
 427         RA_DUMP( dump_chunks(); );
 428
 429         build_constraint_queue();
 430         RA_DUMP( dump_constraint_queue(); );
 431
 432         if ((r = color_constraints()))
 433                 return r;
 434
 435         build_chunk_queue();
 436         color_chunks();
 437
 438         return 0;
 439 }
 440
 441 void coalescer::color_phi_constraint(ra_constraint* c) {
 442 }
 443
 444 ra_chunk* coalescer::detach_value(value *v) {
 445
 446         vvec::iterator F = std::find(v->chunk->values.begin(),
 447                                      v->chunk->values.end(), v);
 448
 449         assert(F != v->chunk->values.end());
 450         v->chunk->values.erase(F);
 451         create_chunk(v);
 452
 453         if (v->is_reg_pinned()) {
 454                 v->chunk->fix();
 455         }
 456
 457         RA_DUMP(
 458                 cerr << "           detached : ";
 459                 dump_chunk(v->chunk);
 460         );
 461
 462         return v->chunk;
 463
 464 }
 465
 466 int coalescer::color_reg_constraint(ra_constraint *c) {
 467         unsigned k, cnt = c->values.size();
 468         vvec & cv = c->values;
 469
 470         ra_chunk *ch[4];
 471         unsigned swz[4] = {0, 1, 2, 3};
 472         val_set interf[4];
 473         sb_bitset rb[4];
 474
 475         bool reg_pinned = false;
 476         unsigned pin_reg = ~0;
 477
 478         unsigned chan_mask = 0;
 479
 480         k = 0;
 481         for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
 482                 value *v = *I;
 483
 484                 if (!v->chunk)
 485                         create_chunk(v);
 486
 487                 ch[k] = v->chunk;
 488
 489                 if (v->chunk->is_chan_pinned()) {
 490                         unsigned chan = 1 << v->chunk->pin.chan();
 491
 492                         if (chan & chan_mask) { // channel already in use
 493                                 ch[k] = detach_value(v);
 494                                 assert(!ch[k]->is_chan_pinned());
 495                         } else {
 496                                 chan_mask |= chan;
 497                         }
 498                 }
 499
 500                 if (v->chunk->is_reg_pinned()) {
 501                         if (!reg_pinned) {
 502                                 reg_pinned = true;
 503                                 pin_reg = v->chunk->pin.sel();
 504                         }
 505                 }
 506
 507                 get_chunk_interferences(ch[k], interf[k]);
 508                 init_reg_bitset(rb[k], interf[k]);
 509         }
 510
 511         unsigned start_reg, end_reg;
 512
 513         start_reg = 0;
 514         end_reg = sh.num_nontemp_gpr();
 515
 516         unsigned min_reg = end_reg;
 517         unsigned min_swz[4];
 518         unsigned i, pass = reg_pinned ? 0 : 1;
 519
 520         bool done = false;
 521
 522         while (pass < 2) {
 523
 524                 unsigned rs, re;
 525
 526                 if (pass == 0) {
 527                         re = pin_reg + 1;
 528                         rs = pin_reg;
 529                 } else {
 530                         re = end_reg;
 531                         rs = start_reg;
 532                 }
 533
 534                 min_reg = re;
 535
 536                 // cycle on swizzle combinations
 537                 do {
 538                         for (i = 0; i < cnt; ++i) {
 539                                 if (ch[i]->flags & RCF_PIN_CHAN)
 540                                         if (ch[i]->pin.chan() != swz[i])
 541                                                 break;
 542                         }
 543                         if (i != cnt)
 544                                 continue;
 545
 546                         // looking for minimal reg number such that the constrained chunks
 547                         // may be colored with the current swizzle combination
 548                         for (unsigned reg = rs; reg < min_reg; ++reg) {
 549                                 for (i = 0; i < cnt; ++i) {
 550                                         unsigned bit = sel_chan(reg, swz[i]);
 551                                         if (bit < rb[i].size() && rb[i].get(bit))
 552                                                 break;
 553                                 }
 554                                 if (i == cnt) {
 555                                         done = true;
 556                                         min_reg = reg;
 557                                         std::copy(swz, swz + 4, min_swz);
 558                                         break;
 559                                 }
 560                         }
 561
 562                         if (pass == 0 && done)
 563                                 break;
 564
 565                 } while (std::next_permutation(swz, swz + 4));
 566
 567                 if (!done && pass) {
 568                         cerr << "sb: ra_coalesce - out of registers\n";
 569                         return -1;
 570                 }
 571
 572                 if (pass == 0 && done)
 573                         break;
 574
 575                 ++pass;
 576         };
 577
 578         assert(done);
 579
 580         RA_DUMP(
 581         cerr << "min reg = " << min_reg << "   min_swz = "
 582                         << min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
 583         );
 584
 585         for (i = 0; i < cnt; ++i) {
 586                 sel_chan color(min_reg, min_swz[i]);
 587                 ra_chunk *cc = ch[i];
 588
 589                 if (cc->is_fixed()) {
 590                         if (cc->pin != color)
 591                                 cc = detach_value(cv[i]);
 592                         else
 593                                 continue;
 594                 }
 595
 596                 color_chunk(cc, color);
 597                 cc->fix();
 598         }
 599
 600         return 0;
 601 }
 602
 603 int coalescer::color_constraints() {
 604         int r;
 605
 606         for (constraint_queue::iterator I = constraints.begin(),
 607                         E = constraints.end(); I != E; ++I) {
 608
 609                 ra_constraint *c = *I;
 610
 611                 RA_DUMP(
 612                         cerr << "color_constraints: ";
 613                         dump_constraint(c);
 614                 );
 615
 616                 if (c->kind == CK_SAME_REG) {
 617                         if ((r = color_reg_constraint(c)))
 618                                 return r;
 619                 } else if (c->kind == CK_PHI)
 620                         color_phi_constraint(c);
 621         }
 622         return 0;
 623 }
 624
 625 } // namespace r600_sb