2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
27 #include "util/u_math.h"
28 #include "util/register_allocate.h"
29 #include "util/ralloc.h"
30 #include "util/bitset.h"
33 #include "ir3_compiler.h"
37 setup_conflicts(struct ir3_ra_reg_set
*set
)
42 for (unsigned i
= 0; i
< class_count
; i
++) {
43 for (unsigned j
= 0; j
< CLASS_REGS(i
); j
++) {
44 for (unsigned br
= j
; br
< j
+ class_sizes
[i
]; br
++) {
45 ra_add_transitive_reg_conflict(set
->regs
, br
, reg
);
52 for (unsigned i
= 0; i
< half_class_count
; i
++) {
53 for (unsigned j
= 0; j
< HALF_CLASS_REGS(i
); j
++) {
54 for (unsigned br
= j
; br
< j
+ half_class_sizes
[i
]; br
++) {
55 ra_add_transitive_reg_conflict(set
->regs
,
56 br
+ set
->first_half_reg
, reg
);
63 for (unsigned i
= 0; i
< high_class_count
; i
++) {
64 for (unsigned j
= 0; j
< HIGH_CLASS_REGS(i
); j
++) {
65 for (unsigned br
= j
; br
< j
+ high_class_sizes
[i
]; br
++) {
66 ra_add_transitive_reg_conflict(set
->regs
,
67 br
+ set
->first_high_reg
, reg
);
75 * Setup conflicts with registers over 0x3f for the special vreg
76 * that exists to use as interference for tex-prefetch:
79 for (unsigned i
= 0x40; i
< CLASS_REGS(0); i
++) {
80 ra_add_transitive_reg_conflict(set
->regs
, i
,
81 set
->prefetch_exclude_reg
);
84 for (unsigned i
= 0x40; i
< HALF_CLASS_REGS(0); i
++) {
85 ra_add_transitive_reg_conflict(set
->regs
, i
+ set
->first_half_reg
,
86 set
->prefetch_exclude_reg
);
90 /* One-time setup of RA register-set, which describes all the possible
91 * "virtual" registers and their interferences. Ie. double register
92 * occupies (and conflicts with) two single registers, and so forth.
93 * Since registers do not need to be aligned to their class size, they
94 * can conflict with other registers in the same class too. Ie:
96 * Single (base) | Double
97 * --------------+---------------
104 * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
105 * really just four scalar registers. Don't let that confuse you.)
107 struct ir3_ra_reg_set
*
108 ir3_ra_alloc_reg_set(struct ir3_compiler
*compiler
, bool mergedregs
)
110 struct ir3_ra_reg_set
*set
= rzalloc(compiler
, struct ir3_ra_reg_set
);
111 unsigned ra_reg_count
, reg
, base
;
113 /* calculate # of regs across all classes: */
115 for (unsigned i
= 0; i
< class_count
; i
++)
116 ra_reg_count
+= CLASS_REGS(i
);
117 for (unsigned i
= 0; i
< half_class_count
; i
++)
118 ra_reg_count
+= HALF_CLASS_REGS(i
);
119 for (unsigned i
= 0; i
< high_class_count
; i
++)
120 ra_reg_count
+= HIGH_CLASS_REGS(i
);
122 ra_reg_count
+= 1; /* for tex-prefetch excludes */
124 /* allocate the reg-set.. */
125 set
->regs
= ra_alloc_reg_set(set
, ra_reg_count
, true);
126 set
->ra_reg_to_gpr
= ralloc_array(set
, uint16_t, ra_reg_count
);
127 set
->gpr_to_ra_reg
= ralloc_array(set
, uint16_t *, total_class_count
);
131 for (unsigned i
= 0; i
< class_count
; i
++) {
132 set
->classes
[i
] = ra_alloc_reg_class(set
->regs
);
134 set
->gpr_to_ra_reg
[i
] = ralloc_array(set
, uint16_t, CLASS_REGS(i
));
136 for (unsigned j
= 0; j
< CLASS_REGS(i
); j
++) {
137 ra_class_add_reg(set
->regs
, set
->classes
[i
], reg
);
139 set
->ra_reg_to_gpr
[reg
] = j
;
140 set
->gpr_to_ra_reg
[i
][j
] = reg
;
146 set
->first_half_reg
= reg
;
149 for (unsigned i
= 0; i
< half_class_count
; i
++) {
150 set
->half_classes
[i
] = ra_alloc_reg_class(set
->regs
);
152 set
->gpr_to_ra_reg
[base
+ i
] =
153 ralloc_array(set
, uint16_t, HALF_CLASS_REGS(i
));
155 for (unsigned j
= 0; j
< HALF_CLASS_REGS(i
); j
++) {
156 ra_class_add_reg(set
->regs
, set
->half_classes
[i
], reg
);
158 set
->ra_reg_to_gpr
[reg
] = j
;
159 set
->gpr_to_ra_reg
[base
+ i
][j
] = reg
;
165 set
->first_high_reg
= reg
;
168 for (unsigned i
= 0; i
< high_class_count
; i
++) {
169 set
->high_classes
[i
] = ra_alloc_reg_class(set
->regs
);
171 set
->gpr_to_ra_reg
[base
+ i
] =
172 ralloc_array(set
, uint16_t, HIGH_CLASS_REGS(i
));
174 for (unsigned j
= 0; j
< HIGH_CLASS_REGS(i
); j
++) {
175 ra_class_add_reg(set
->regs
, set
->high_classes
[i
], reg
);
177 set
->ra_reg_to_gpr
[reg
] = j
;
178 set
->gpr_to_ra_reg
[base
+ i
][j
] = reg
;
185 * Setup an additional class, with one vreg, to simply conflict
186 * with registers that are too high to encode tex-prefetch. This
187 * vreg is only used to setup additional conflicts so that RA
188 * knows to allocate prefetch dst regs below the limit:
190 set
->prefetch_exclude_class
= ra_alloc_reg_class(set
->regs
);
191 ra_class_add_reg(set
->regs
, set
->prefetch_exclude_class
, reg
);
192 set
->prefetch_exclude_reg
= reg
++;
195 * And finally setup conflicts. Starting a6xx, half precision regs
196 * conflict w/ full precision regs (when using MERGEDREGS):
199 for (unsigned i
= 0; i
< CLASS_REGS(0) / 2; i
++) {
200 unsigned freg
= set
->gpr_to_ra_reg
[0][i
];
201 unsigned hreg0
= set
->gpr_to_ra_reg
[0 + HALF_OFFSET
][(i
* 2) + 0];
202 unsigned hreg1
= set
->gpr_to_ra_reg
[0 + HALF_OFFSET
][(i
* 2) + 1];
204 ra_add_transitive_reg_pair_conflict(set
->regs
, freg
, hreg0
, hreg1
);
208 setup_conflicts(set
);
210 ra_set_finalize(set
->regs
, NULL
);
216 ra_size_to_class(unsigned sz
, bool half
, bool high
)
219 for (unsigned i
= 0; i
< high_class_count
; i
++)
220 if (high_class_sizes
[i
] >= sz
)
221 return i
+ HIGH_OFFSET
;
223 for (unsigned i
= 0; i
< half_class_count
; i
++)
224 if (half_class_sizes
[i
] >= sz
)
225 return i
+ HALF_OFFSET
;
227 for (unsigned i
= 0; i
< class_count
; i
++)
228 if (class_sizes
[i
] >= sz
)
236 ra_class_to_size(unsigned class, bool *half
, bool *high
)
238 *half
= *high
= false;
240 if (class >= HIGH_OFFSET
) {
242 return high_class_sizes
[class - HIGH_OFFSET
];
243 } else if (class >= HALF_OFFSET
) {
245 return half_class_sizes
[class - HALF_OFFSET
];
247 return class_sizes
[class];