- for (i = g->count - 1; i >= 0; i--) {
- if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG)
- continue;
-
- if (pq_test(g, i)) {
- decrement_q(g, i);
- g->stack[g->stack_count] = i;
- g->stack_count++;
- g->nodes[i].in_stack = true;
- progress = true;
- } else {
- unsigned int new_q_total = g->nodes[i].q_total;
- if (new_q_total < lowest_q_total) {
- best_optimistic_node = i;
- lowest_q_total = new_q_total;
- }
- }
+ for (int i = BITSET_WORDS(g->count) - 1, high_bit = top_word_high_bit;
+ i >= 0; i--, high_bit = BITSET_WORDBITS - 1) {
+ BITSET_WORD mask = ~(BITSET_WORD)0 >> (31 - high_bit);
+
+ BITSET_WORD skip = g->tmp.in_stack[i] | g->tmp.reg_assigned[i];
+ if (skip == mask)
+ continue;
+
+ BITSET_WORD pq = g->tmp.pq_test[i] & ~skip;
+ if (pq) {
+ /* In this case, we have stuff we can immediately take off the
+ * stack. This also means that we're guaranteed to make progress
+ * and we don't need to bother updating lowest_q_total because we
+ * know we're going to loop again before attempting to do anything
+ * optimistic.
+ */
+ for (int j = high_bit; j >= 0; j--) {
+ if (pq & BITSET_BIT(j)) {
+ unsigned int n = i * BITSET_WORDBITS + j;
+ assert(n < g->count);
+ add_node_to_stack(g, n);
+ /* add_node_to_stack() may update pq_test for this word so
+ * we need to update our local copy.
+ */
+ pq = g->tmp.pq_test[i] & ~skip;
+ progress = true;
+ }
+ }
+ } else if (!progress) {
+ if (g->tmp.min_q_total[i] == UINT_MAX) {
+ /* The min_q_total and min_q_node are dirty because we added
+ * one of these nodes to the stack. It needs to be
+ * recalculated.
+ */
+ for (int j = high_bit; j >= 0; j--) {
+ if (skip & BITSET_BIT(j))
+ continue;
+
+ unsigned int n = i * BITSET_WORDBITS + j;
+ assert(n < g->count);
+ if (g->nodes[n].tmp.q_total < g->tmp.min_q_total[i]) {
+ g->tmp.min_q_total[i] = g->nodes[n].tmp.q_total;
+ g->tmp.min_q_node[i] = n;
+ }
+ }
+ }
+ if (g->tmp.min_q_total[i] < min_q_total) {
+ min_q_node = g->tmp.min_q_node[i];
+ min_q_total = g->tmp.min_q_total[i];
+ }
+ }