2 * Copyright (C) 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
32 #include "util/macros.h"
33 #include "util/u_math.h"
36 /* This module is the reference implementation of "Linearly Constrained
37 * Register Allocation". The paper is available in PDF form
38 * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
39 * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
45 unsigned min_alignment
, unsigned max_alignment
,
46 unsigned bound
, unsigned class_count
)
48 struct lcra_state
*l
= calloc(1, sizeof(*l
));
50 l
->node_count
= node_count
;
51 l
->class_count
= class_count
;
54 l
->alignment
= calloc(sizeof(l
->alignment
[0]), node_count
);
55 l
->linear
= calloc(sizeof(l
->linear
[0]), node_count
* node_count
);
56 l
->modulus
= calloc(sizeof(l
->modulus
[0]), node_count
);
57 l
->class = calloc(sizeof(l
->class[0]), node_count
);
58 l
->class_start
= calloc(sizeof(l
->class_start
[0]), class_count
);
59 l
->class_disjoint
= calloc(sizeof(l
->class_disjoint
[0]), class_count
* class_count
);
60 l
->class_size
= calloc(sizeof(l
->class_size
[0]), class_count
);
61 l
->spill_cost
= calloc(sizeof(l
->spill_cost
[0]), node_count
);
62 l
->solutions
= calloc(sizeof(l
->solutions
[0]), node_count
);
64 memset(l
->solutions
, ~0, sizeof(l
->solutions
[0]) * node_count
);
70 lcra_free(struct lcra_state
*l
)
80 free(l
->class_disjoint
);
89 lcra_set_alignment(struct lcra_state
*l
, unsigned node
, unsigned align_log2
)
91 l
->alignment
[node
] = align_log2
+ 1;
95 lcra_set_disjoint_class(struct lcra_state
*l
, unsigned c1
, unsigned c2
)
97 l
->class_disjoint
[(c1
* l
->class_count
) + c2
] = true;
98 l
->class_disjoint
[(c2
* l
->class_count
) + c1
] = true;
102 lcra_restrict_range(struct lcra_state
*l
, unsigned node
, unsigned len
)
104 if (node
< l
->node_count
&& l
->alignment
[node
])
105 l
->modulus
[node
] = DIV_ROUND_UP(l
->bound
- len
+ 1, 1 << (l
->alignment
[node
] - 1));
109 lcra_add_node_interference(struct lcra_state
*l
, unsigned i
, unsigned cmask_i
, unsigned j
, unsigned cmask_j
)
114 if (l
->class_disjoint
[(l
->class[i
] * l
->class_count
) + l
->class[j
]])
117 uint32_t constraint_fw
= 0;
118 uint32_t constraint_bw
= 0;
120 for (unsigned D
= 0; D
< 16; ++D
) {
121 if (cmask_i
& (cmask_j
<< D
)) {
122 constraint_bw
|= (1 << (15 + D
));
123 constraint_fw
|= (1 << (15 - D
));
126 if (cmask_i
& (cmask_j
>> D
)) {
127 constraint_fw
|= (1 << (15 + D
));
128 constraint_bw
|= (1 << (15 - D
));
132 l
->linear
[j
* l
->node_count
+ i
] |= constraint_fw
;
133 l
->linear
[i
* l
->node_count
+ j
] |= constraint_bw
;
137 lcra_test_linear(struct lcra_state
*l
, unsigned *solutions
, unsigned i
)
139 unsigned *row
= &l
->linear
[i
* l
->node_count
];
140 signed constant
= solutions
[i
];
142 for (unsigned j
= 0; j
< l
->node_count
; ++j
) {
143 if (solutions
[j
] == ~0) continue;
145 signed lhs
= solutions
[j
] - constant
;
147 if (lhs
< -15 || lhs
> 15)
150 if (row
[j
] & (1 << (lhs
+ 15)))
158 lcra_solve(struct lcra_state
*l
)
160 for (unsigned step
= 0; step
< l
->node_count
; ++step
) {
161 if (l
->solutions
[step
] != ~0) continue;
162 if (l
->alignment
[step
] == 0) continue;
164 unsigned _class
= l
->class[step
];
165 unsigned class_start
= l
->class_start
[_class
];
167 unsigned shift
= l
->alignment
[step
] - 1;
169 unsigned P
= l
->bound
>> shift
;
170 unsigned Q
= l
->modulus
[step
];
171 unsigned r_max
= l
->class_size
[_class
];
172 unsigned k_max
= r_max
>> shift
;
173 unsigned m_max
= k_max
/ P
;
176 for (unsigned m
= 0; m
< m_max
; ++m
) {
177 for (unsigned n
= 0; n
< Q
; ++n
) {
178 l
->solutions
[step
] = ((m
* P
+ n
) << shift
) + class_start
;
179 succ
= lcra_test_linear(l
, l
->solutions
, step
);
187 /* Out of registers - prepare to spill */
189 l
->spill_class
= l
->class[step
];
197 /* Register spilling is implemented with a cost-benefit system. Costs are set
198 * by the user. Benefits are calculated from the constraints. */
201 lcra_set_node_spill_cost(struct lcra_state
*l
, unsigned node
, signed cost
)
203 if (node
< l
->node_count
)
204 l
->spill_cost
[node
] = cost
;
207 /* Count along the lower triangle */
210 lcra_count_constraints(struct lcra_state
*l
, unsigned i
)
213 unsigned *constraints
= &l
->linear
[i
* l
->node_count
];
215 for (unsigned j
= 0; j
< i
; ++j
)
216 count
+= util_bitcount(constraints
[j
]);
222 lcra_get_best_spill_node(struct lcra_state
*l
)
224 float best_benefit
= -1.0;
225 signed best_node
= -1;
227 for (unsigned i
= 0; i
< l
->node_count
; ++i
) {
228 /* Find spillable nodes */
229 if (l
->class[i
] != l
->spill_class
) continue;
230 if (l
->spill_cost
[i
] < 0) continue;
232 /* Adapted from Chaitin's heuristic */
233 float constraints
= lcra_count_constraints(l
, i
);
234 float cost
= (l
->spill_cost
[i
] + 1);
235 float benefit
= constraints
/ cost
;
237 if (benefit
> best_benefit
) {
238 best_benefit
= benefit
;