2 * Copyright (C) 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
32 #include "util/macros.h"
33 #include "util/u_math.h"
36 /* This module is the reference implementation of "Linearly Constrained
37 * Register Allocation". The paper is available in PDF form
38 * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
39 * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
44 unsigned node_count
, unsigned class_count
)
46 struct lcra_state
*l
= calloc(1, sizeof(*l
));
48 l
->node_count
= node_count
;
49 l
->class_count
= class_count
;
51 l
->alignment
= calloc(sizeof(l
->alignment
[0]), node_count
);
52 l
->linear
= calloc(sizeof(l
->linear
[0]), node_count
* node_count
);
53 l
->modulus
= calloc(sizeof(l
->modulus
[0]), node_count
);
54 l
->class = calloc(sizeof(l
->class[0]), node_count
);
55 l
->class_start
= calloc(sizeof(l
->class_start
[0]), class_count
);
56 l
->class_disjoint
= calloc(sizeof(l
->class_disjoint
[0]), class_count
* class_count
);
57 l
->class_size
= calloc(sizeof(l
->class_size
[0]), class_count
);
58 l
->spill_cost
= calloc(sizeof(l
->spill_cost
[0]), node_count
);
59 l
->solutions
= calloc(sizeof(l
->solutions
[0]), node_count
);
61 memset(l
->solutions
, ~0, sizeof(l
->solutions
[0]) * node_count
);
67 lcra_free(struct lcra_state
*l
)
77 free(l
->class_disjoint
);
86 lcra_set_alignment(struct lcra_state
*l
, unsigned node
, unsigned align_log2
, unsigned bound
)
88 l
->alignment
[node
] = (align_log2
+ 1) | (bound
<< 16);
92 lcra_set_disjoint_class(struct lcra_state
*l
, unsigned c1
, unsigned c2
)
94 l
->class_disjoint
[(c1
* l
->class_count
) + c2
] = true;
95 l
->class_disjoint
[(c2
* l
->class_count
) + c1
] = true;
99 lcra_restrict_range(struct lcra_state
*l
, unsigned node
, unsigned len
)
101 if (node
< l
->node_count
&& l
->alignment
[node
]) {
102 unsigned BA
= l
->alignment
[node
];
103 unsigned alignment
= (BA
& 0xffff) - 1;
104 unsigned bound
= BA
>> 16;
105 l
->modulus
[node
] = DIV_ROUND_UP(bound
- len
+ 1, 1 << alignment
);
110 lcra_add_node_interference(struct lcra_state
*l
, unsigned i
, unsigned cmask_i
, unsigned j
, unsigned cmask_j
)
115 if (l
->class_disjoint
[(l
->class[i
] * l
->class_count
) + l
->class[j
]])
118 uint32_t constraint_fw
= 0;
119 uint32_t constraint_bw
= 0;
121 for (unsigned D
= 0; D
< 16; ++D
) {
122 if (cmask_i
& (cmask_j
<< D
)) {
123 constraint_bw
|= (1 << (15 + D
));
124 constraint_fw
|= (1 << (15 - D
));
127 if (cmask_i
& (cmask_j
>> D
)) {
128 constraint_fw
|= (1 << (15 + D
));
129 constraint_bw
|= (1 << (15 - D
));
133 l
->linear
[j
* l
->node_count
+ i
] |= constraint_fw
;
134 l
->linear
[i
* l
->node_count
+ j
] |= constraint_bw
;
138 lcra_test_linear(struct lcra_state
*l
, unsigned *solutions
, unsigned i
)
140 unsigned *row
= &l
->linear
[i
* l
->node_count
];
141 signed constant
= solutions
[i
];
143 for (unsigned j
= 0; j
< l
->node_count
; ++j
) {
144 if (solutions
[j
] == ~0) continue;
146 signed lhs
= solutions
[j
] - constant
;
148 if (lhs
< -15 || lhs
> 15)
151 if (row
[j
] & (1 << (lhs
+ 15)))
159 lcra_solve(struct lcra_state
*l
)
161 for (unsigned step
= 0; step
< l
->node_count
; ++step
) {
162 if (l
->solutions
[step
] != ~0) continue;
163 if (l
->alignment
[step
] == 0) continue;
165 unsigned _class
= l
->class[step
];
166 unsigned class_start
= l
->class_start
[_class
];
168 unsigned BA
= l
->alignment
[step
];
169 unsigned shift
= (BA
& 0xffff) - 1;
170 unsigned bound
= BA
>> 16;
172 unsigned P
= bound
>> shift
;
173 unsigned Q
= l
->modulus
[step
];
174 unsigned r_max
= l
->class_size
[_class
];
175 unsigned k_max
= r_max
>> shift
;
176 unsigned m_max
= k_max
/ P
;
179 for (unsigned m
= 0; m
< m_max
; ++m
) {
180 for (unsigned n
= 0; n
< Q
; ++n
) {
181 l
->solutions
[step
] = ((m
* P
+ n
) << shift
) + class_start
;
182 succ
= lcra_test_linear(l
, l
->solutions
, step
);
190 /* Out of registers - prepare to spill */
192 l
->spill_class
= l
->class[step
];
200 /* Register spilling is implemented with a cost-benefit system. Costs are set
201 * by the user. Benefits are calculated from the constraints. */
204 lcra_set_node_spill_cost(struct lcra_state
*l
, unsigned node
, signed cost
)
206 if (node
< l
->node_count
)
207 l
->spill_cost
[node
] = cost
;
210 /* Count along the lower triangle */
213 lcra_count_constraints(struct lcra_state
*l
, unsigned i
)
216 unsigned *constraints
= &l
->linear
[i
* l
->node_count
];
218 for (unsigned j
= 0; j
< i
; ++j
)
219 count
+= util_bitcount(constraints
[j
]);
225 lcra_get_best_spill_node(struct lcra_state
*l
)
227 float best_benefit
= -1.0;
228 signed best_node
= -1;
230 for (unsigned i
= 0; i
< l
->node_count
; ++i
) {
231 /* Find spillable nodes */
232 if (l
->class[i
] != l
->spill_class
) continue;
233 if (l
->spill_cost
[i
] < 0) continue;
235 /* Adapted from Chaitin's heuristic */
236 float constraints
= lcra_count_constraints(l
, i
);
237 float cost
= (l
->spill_cost
[i
] + 1);
238 float benefit
= constraints
/ cost
;
240 if (benefit
> best_benefit
) {
241 best_benefit
= benefit
;