2 * Copyright (C) 2020 Collabora Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
28 #include "panfrost/util/lcra.h"
29 #include "util/u_memory.h"
32 bi_compute_interference(bi_context
*ctx
, struct lcra_state
*l
)
34 bi_compute_liveness(ctx
);
36 bi_foreach_block(ctx
, _blk
) {
37 bi_block
*blk
= (bi_block
*) _blk
;
38 uint16_t *live
= mem_dup(_blk
->live_out
, l
->node_count
* sizeof(uint16_t));
40 bi_foreach_instr_in_block_rev(blk
, ins
) {
41 /* Mark all registers live after the instruction as
42 * interfering with the destination */
44 if (ins
->dest
&& (ins
->dest
< l
->node_count
)) {
45 for (unsigned i
= 1; i
< l
->node_count
; ++i
) {
47 lcra_add_node_interference(l
, ins
->dest
, ins
->writemask
, i
, live
[i
]);
52 bi_liveness_ins_update(live
, ins
, l
->node_count
);
60 BI_REG_CLASS_WORK
= 0,
63 static struct lcra_state
*
64 bi_allocate_registers(bi_context
*ctx
, bool *success
)
66 unsigned node_count
= bi_max_temp(ctx
);
68 struct lcra_state
*l
=
69 lcra_alloc_equations(node_count
, 1, 8, 16, 1);
71 l
->class_start
[BI_REG_CLASS_WORK
] = 0;
72 l
->class_size
[BI_REG_CLASS_WORK
] = 64 * 4; /* R0 - R63, all 32-bit */
74 bi_foreach_instr_global(ctx
, ins
) {
75 unsigned dest
= ins
->dest
;
77 if (!dest
|| (dest
>= node_count
))
80 l
->class[dest
] = BI_REG_CLASS_WORK
;
81 lcra_set_alignment(l
, dest
, 2); /* 2^2 = 4 */
82 lcra_restrict_range(l
, dest
, 4);
85 bi_compute_interference(ctx
, l
);
87 *success
= lcra_solve(l
);
93 bi_reg_from_index(struct lcra_state
*l
, unsigned index
, unsigned offset
)
95 /* Did we run RA for this index at all */
96 if (index
>= l
->node_count
)
99 /* LCRA didn't bother solving this index (how lazy!) */
100 signed solution
= l
->solutions
[index
];
106 assert((solution
& 0x3) == 0);
107 unsigned reg
= solution
/ 4;
108 return BIR_INDEX_REGISTER
| reg
;
112 bi_adjust_src_ra(bi_instruction
*ins
, struct lcra_state
*l
, unsigned src
)
114 if (ins
->src
[src
] >= l
->node_count
)
117 bool vector
= (bi_class_props
[ins
->type
] & BI_VECTOR
) && src
== 0;
121 /* TODO: Do we do anything here? */
123 /* Use the swizzle as component select */
124 nir_alu_type T
= ins
->src_types
[src
];
125 unsigned size
= nir_alu_type_get_type_size(T
);
126 assert(size
<= 32); /* TODO: 64-bit */
127 unsigned comps_per_reg
= 32 / size
;
128 unsigned components
= bi_get_component_count(ins
, src
);
130 for (unsigned i
= 0; i
< components
; ++i
) {
131 /* If we're not writing the component, who cares? */
132 if (!bi_writes_component(ins
, i
))
135 unsigned off
= ins
->swizzle
[src
][i
] / comps_per_reg
;
136 off
*= 4; /* 32-bit registers */
138 /* We can't cross register boundaries in a swizzle */
142 assert(off
== offset
);
144 ins
->swizzle
[src
][i
] %= comps_per_reg
;
148 ins
->src
[src
] = bi_reg_from_index(l
, ins
->src
[src
], offset
);
152 bi_adjust_dest_ra(bi_instruction
*ins
, struct lcra_state
*l
)
154 if (ins
->dest
>= l
->node_count
)
157 bool vector
= (bi_class_props
[ins
->type
] & BI_VECTOR
);
161 /* Look at the writemask to get an offset, specifically the
164 unsigned tz
= __builtin_ctz(ins
->writemask
);
166 /* Recall writemask is one bit per byte, so tz is in eytes */
167 unsigned regs
= tz
/ 4;
170 /* Adjust writemask to compensate */
171 ins
->writemask
>>= offset
;
174 ins
->dest
= bi_reg_from_index(l
, ins
->dest
, offset
);
179 bi_install_registers(bi_context
*ctx
, struct lcra_state
*l
)
181 bi_foreach_instr_global(ctx
, ins
) {
182 bi_adjust_dest_ra(ins
, l
);
184 bi_foreach_src(ins
, s
)
185 bi_adjust_src_ra(ins
, l
, s
);
190 bi_register_allocate(bi_context
*ctx
)
192 struct lcra_state
*l
= NULL
;
193 bool success
= false;
201 bi_invalidate_liveness(ctx
);
202 l
= bi_allocate_registers(ctx
, &success
);
208 bi_install_registers(ctx
, l
);