2 * Copyright (C) 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
32 #include "util/macros.h"
33 #include "util/u_math.h"
36 /* This module is the reference implementation of "Linearly Constrained
37 * Register Allocation". The paper is available in PDF form
38 * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
39 * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
45 unsigned min_alignment
, unsigned max_alignment
,
46 unsigned bound
, unsigned class_count
)
48 struct lcra_state
*l
= calloc(1, sizeof(*l
));
50 l
->node_count
= node_count
;
51 l
->class_count
= class_count
;
54 l
->alignment
= calloc(sizeof(l
->alignment
[0]), node_count
);
55 l
->linear
= calloc(sizeof(l
->linear
[0]), node_count
* node_count
);
56 l
->modulus
= calloc(sizeof(l
->modulus
[0]), node_count
);
57 l
->class = calloc(sizeof(l
->class[0]), node_count
);
58 l
->class_start
= calloc(sizeof(l
->class_start
[0]), class_count
);
59 l
->class_disjoint
= calloc(sizeof(l
->class_disjoint
[0]), class_count
* class_count
);
60 l
->class_size
= calloc(sizeof(l
->class_size
[0]), class_count
);
61 l
->spill_cost
= calloc(sizeof(l
->spill_cost
[0]), node_count
);
62 l
->solutions
= calloc(sizeof(l
->solutions
[0]), node_count
);
64 memset(l
->solutions
, ~0, sizeof(l
->solutions
[0]) * node_count
);
70 lcra_set_alignment(struct lcra_state
*l
, unsigned node
, unsigned align_log2
)
72 l
->alignment
[node
] = align_log2
+ 1;
76 lcra_set_disjoint_class(struct lcra_state
*l
, unsigned c1
, unsigned c2
)
78 l
->class_disjoint
[(c1
* l
->class_count
) + c2
] = true;
79 l
->class_disjoint
[(c2
* l
->class_count
) + c1
] = true;
83 lcra_restrict_range(struct lcra_state
*l
, unsigned node
, unsigned len
)
85 if (l
->alignment
[node
])
86 l
->modulus
[node
] = DIV_ROUND_UP(l
->bound
- len
+ 1, 1 << (l
->alignment
[node
] - 1));
90 lcra_add_node_interference(struct lcra_state
*l
, unsigned i
, unsigned cmask_i
, unsigned j
, unsigned cmask_j
)
95 if (l
->class_disjoint
[(l
->class[i
] * l
->class_count
) + l
->class[j
]])
98 uint32_t constraint_fw
= 0;
99 uint32_t constraint_bw
= 0;
101 for (unsigned D
= 0; D
< 16; ++D
) {
102 if (cmask_i
& (cmask_j
<< D
)) {
103 constraint_bw
|= (1 << (15 + D
));
104 constraint_fw
|= (1 << (15 - D
));
107 if (cmask_i
& (cmask_j
>> D
)) {
108 constraint_fw
|= (1 << (15 + D
));
109 constraint_bw
|= (1 << (15 - D
));
113 l
->linear
[j
* l
->node_count
+ i
] |= constraint_fw
;
114 l
->linear
[i
* l
->node_count
+ j
] |= constraint_bw
;
118 lcra_test_linear(struct lcra_state
*l
, unsigned *solutions
, unsigned i
)
120 unsigned *row
= &l
->linear
[i
* l
->node_count
];
121 signed constant
= solutions
[i
];
123 for (unsigned j
= 0; j
< l
->node_count
; ++j
) {
124 if (solutions
[j
] == ~0) continue;
126 signed lhs
= solutions
[j
] - constant
;
128 if (lhs
< -15 || lhs
> 15)
131 if (row
[j
] & (1 << (lhs
+ 15)))
139 lcra_solve(struct lcra_state
*l
)
141 for (unsigned step
= 0; step
< l
->node_count
; ++step
) {
142 if (l
->solutions
[step
] != ~0) continue;
143 if (l
->alignment
[step
] == 0) continue;
145 unsigned _class
= l
->class[step
];
146 unsigned class_start
= l
->class_start
[_class
];
148 unsigned shift
= l
->alignment
[step
] - 1;
150 unsigned P
= l
->bound
>> shift
;
151 unsigned Q
= l
->modulus
[step
];
152 unsigned r_max
= l
->class_size
[_class
];
153 unsigned k_max
= r_max
>> shift
;
154 unsigned m_max
= k_max
/ P
;
157 for (unsigned m
= 0; m
< m_max
; ++m
) {
158 for (unsigned n
= 0; n
< Q
; ++n
) {
159 l
->solutions
[step
] = ((m
* P
+ n
) << shift
) + class_start
;
160 succ
= lcra_test_linear(l
, l
->solutions
, step
);
168 /* Out of registers - prepare to spill */
170 l
->spill_class
= l
->class[step
];
178 /* Register spilling is implemented with a cost-benefit system. Costs are set
179 * by the user. Benefits are calculated from the constraints. */
182 lcra_set_node_spill_cost(struct lcra_state
*l
, unsigned node
, signed cost
)
184 l
->spill_cost
[node
] = cost
;
187 /* Count along the lower triangle */
190 lcra_count_constraints(struct lcra_state
*l
, unsigned i
)
193 unsigned *constraints
= &l
->linear
[i
* l
->node_count
];
195 for (unsigned j
= 0; j
< i
; ++j
)
196 count
+= util_bitcount(constraints
[j
]);
202 lcra_get_best_spill_node(struct lcra_state
*l
)
204 float best_benefit
= -1.0;
205 signed best_node
= -1;
207 for (unsigned i
= 0; i
< l
->node_count
; ++i
) {
208 /* Find spillable nodes */
209 if (l
->class[i
] != l
->spill_class
) continue;
210 if (l
->spill_cost
[i
] < 0) continue;
212 /* Adapted from Chaitin's heuristic */
213 float constraints
= lcra_count_constraints(l
, i
);
214 float cost
= (l
->spill_cost
[i
] + 1);
215 float benefit
= constraints
/ cost
;
217 if (benefit
> best_benefit
) {
218 best_benefit
= benefit
;