2 * Copyright (C) 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
32 #include "util/macros.h"
33 #include "util/u_math.h"
36 /* This module is the reference implementation of "Linearly Constrained
37 * Register Allocation". The paper is available in PDF form
38 * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
39 * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
45 unsigned bound
, unsigned class_count
)
47 struct lcra_state
*l
= calloc(1, sizeof(*l
));
49 l
->node_count
= node_count
;
50 l
->class_count
= class_count
;
53 l
->alignment
= calloc(sizeof(l
->alignment
[0]), node_count
);
54 l
->linear
= calloc(sizeof(l
->linear
[0]), node_count
* node_count
);
55 l
->modulus
= calloc(sizeof(l
->modulus
[0]), node_count
);
56 l
->class = calloc(sizeof(l
->class[0]), node_count
);
57 l
->class_start
= calloc(sizeof(l
->class_start
[0]), class_count
);
58 l
->class_disjoint
= calloc(sizeof(l
->class_disjoint
[0]), class_count
* class_count
);
59 l
->class_size
= calloc(sizeof(l
->class_size
[0]), class_count
);
60 l
->spill_cost
= calloc(sizeof(l
->spill_cost
[0]), node_count
);
61 l
->solutions
= calloc(sizeof(l
->solutions
[0]), node_count
);
63 memset(l
->solutions
, ~0, sizeof(l
->solutions
[0]) * node_count
);
69 lcra_free(struct lcra_state
*l
)
79 free(l
->class_disjoint
);
88 lcra_set_alignment(struct lcra_state
*l
, unsigned node
, unsigned align_log2
)
90 l
->alignment
[node
] = align_log2
+ 1;
94 lcra_set_disjoint_class(struct lcra_state
*l
, unsigned c1
, unsigned c2
)
96 l
->class_disjoint
[(c1
* l
->class_count
) + c2
] = true;
97 l
->class_disjoint
[(c2
* l
->class_count
) + c1
] = true;
101 lcra_restrict_range(struct lcra_state
*l
, unsigned node
, unsigned len
)
103 if (node
< l
->node_count
&& l
->alignment
[node
])
104 l
->modulus
[node
] = DIV_ROUND_UP(l
->bound
- len
+ 1, 1 << (l
->alignment
[node
] - 1));
108 lcra_add_node_interference(struct lcra_state
*l
, unsigned i
, unsigned cmask_i
, unsigned j
, unsigned cmask_j
)
113 if (l
->class_disjoint
[(l
->class[i
] * l
->class_count
) + l
->class[j
]])
116 uint32_t constraint_fw
= 0;
117 uint32_t constraint_bw
= 0;
119 for (unsigned D
= 0; D
< 16; ++D
) {
120 if (cmask_i
& (cmask_j
<< D
)) {
121 constraint_bw
|= (1 << (15 + D
));
122 constraint_fw
|= (1 << (15 - D
));
125 if (cmask_i
& (cmask_j
>> D
)) {
126 constraint_fw
|= (1 << (15 + D
));
127 constraint_bw
|= (1 << (15 - D
));
131 l
->linear
[j
* l
->node_count
+ i
] |= constraint_fw
;
132 l
->linear
[i
* l
->node_count
+ j
] |= constraint_bw
;
136 lcra_test_linear(struct lcra_state
*l
, unsigned *solutions
, unsigned i
)
138 unsigned *row
= &l
->linear
[i
* l
->node_count
];
139 signed constant
= solutions
[i
];
141 for (unsigned j
= 0; j
< l
->node_count
; ++j
) {
142 if (solutions
[j
] == ~0) continue;
144 signed lhs
= solutions
[j
] - constant
;
146 if (lhs
< -15 || lhs
> 15)
149 if (row
[j
] & (1 << (lhs
+ 15)))
157 lcra_solve(struct lcra_state
*l
)
159 for (unsigned step
= 0; step
< l
->node_count
; ++step
) {
160 if (l
->solutions
[step
] != ~0) continue;
161 if (l
->alignment
[step
] == 0) continue;
163 unsigned _class
= l
->class[step
];
164 unsigned class_start
= l
->class_start
[_class
];
166 unsigned shift
= l
->alignment
[step
] - 1;
168 unsigned P
= l
->bound
>> shift
;
169 unsigned Q
= l
->modulus
[step
];
170 unsigned r_max
= l
->class_size
[_class
];
171 unsigned k_max
= r_max
>> shift
;
172 unsigned m_max
= k_max
/ P
;
175 for (unsigned m
= 0; m
< m_max
; ++m
) {
176 for (unsigned n
= 0; n
< Q
; ++n
) {
177 l
->solutions
[step
] = ((m
* P
+ n
) << shift
) + class_start
;
178 succ
= lcra_test_linear(l
, l
->solutions
, step
);
186 /* Out of registers - prepare to spill */
188 l
->spill_class
= l
->class[step
];
196 /* Register spilling is implemented with a cost-benefit system. Costs are set
197 * by the user. Benefits are calculated from the constraints. */
200 lcra_set_node_spill_cost(struct lcra_state
*l
, unsigned node
, signed cost
)
202 if (node
< l
->node_count
)
203 l
->spill_cost
[node
] = cost
;
206 /* Count along the lower triangle */
209 lcra_count_constraints(struct lcra_state
*l
, unsigned i
)
212 unsigned *constraints
= &l
->linear
[i
* l
->node_count
];
214 for (unsigned j
= 0; j
< i
; ++j
)
215 count
+= util_bitcount(constraints
[j
]);
221 lcra_get_best_spill_node(struct lcra_state
*l
)
223 float best_benefit
= -1.0;
224 signed best_node
= -1;
226 for (unsigned i
= 0; i
< l
->node_count
; ++i
) {
227 /* Find spillable nodes */
228 if (l
->class[i
] != l
->spill_class
) continue;
229 if (l
->spill_cost
[i
] < 0) continue;
231 /* Adapted from Chaitin's heuristic */
232 float constraints
= lcra_count_constraints(l
, i
);
233 float cost
= (l
->spill_cost
[i
] + 1);
234 float benefit
= constraints
/ cost
;
236 if (benefit
> best_benefit
) {
237 best_benefit
= benefit
;