2 * Copyright (c) 2019 Zodiac Inflight Innovations
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Jonathan Marek <jonathan@marek.ca>
27 #include "etnaviv_compiler_nir.h"
28 #include "util/register_allocate.h"
30 /* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base
31 * (fs registers are offset by 1 to avoid reserving r0)
33 #define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z)
35 /* precomputed by register_allocate */
36 static unsigned int *q_values
[] = {
37 (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
38 (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
39 (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
40 (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
41 (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
42 (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
43 (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
46 static inline int reg_get_class(int virt_reg
)
48 switch (reg_get_type(virt_reg
)) {
50 return REG_CLASS_VEC4
;
51 case REG_TYPE_VIRT_VEC3_XYZ
:
52 case REG_TYPE_VIRT_VEC3_XYW
:
53 case REG_TYPE_VIRT_VEC3_XZW
:
54 case REG_TYPE_VIRT_VEC3_YZW
:
55 return REG_CLASS_VIRT_VEC3
;
56 case REG_TYPE_VIRT_VEC2_XY
:
57 case REG_TYPE_VIRT_VEC2_XZ
:
58 case REG_TYPE_VIRT_VEC2_XW
:
59 case REG_TYPE_VIRT_VEC2_YZ
:
60 case REG_TYPE_VIRT_VEC2_YW
:
61 case REG_TYPE_VIRT_VEC2_ZW
:
62 return REG_CLASS_VIRT_VEC2
;
63 case REG_TYPE_VIRT_SCALAR_X
:
64 case REG_TYPE_VIRT_SCALAR_Y
:
65 case REG_TYPE_VIRT_SCALAR_Z
:
66 case REG_TYPE_VIRT_SCALAR_W
:
67 return REG_CLASS_VIRT_SCALAR
;
68 case REG_TYPE_VIRT_VEC2T_XY
:
69 case REG_TYPE_VIRT_VEC2T_ZW
:
70 return REG_CLASS_VIRT_VEC2T
;
71 case REG_TYPE_VIRT_VEC2C_XY
:
72 case REG_TYPE_VIRT_VEC2C_YZ
:
73 case REG_TYPE_VIRT_VEC2C_ZW
:
74 return REG_CLASS_VIRT_VEC2C
;
75 case REG_TYPE_VIRT_VEC3C_XYZ
:
76 case REG_TYPE_VIRT_VEC3C_YZW
:
77 return REG_CLASS_VIRT_VEC3C
;
85 etna_ra_setup(void *mem_ctx
)
87 struct ra_regs
*regs
= ra_alloc_reg_set(mem_ctx
, ETNA_MAX_TEMPS
*
88 NUM_REG_TYPES
, false);
90 /* classes always be created from index 0, so equal to the class enum
91 * which represents a register with (c+1) components
93 for (int c
= 0; c
< NUM_REG_CLASSES
; c
++)
94 ra_alloc_reg_class(regs
);
95 /* add each register of each class */
96 for (int r
= 0; r
< NUM_REG_TYPES
* ETNA_MAX_TEMPS
; r
++)
97 ra_class_add_reg(regs
, reg_get_class(r
), r
);
99 for (int r
= 0; r
< ETNA_MAX_TEMPS
; r
++) {
100 for (int i
= 0; i
< NUM_REG_TYPES
; i
++) {
101 for (int j
= 0; j
< i
; j
++) {
102 if (reg_writemask
[i
] & reg_writemask
[j
]) {
103 ra_add_reg_conflict(regs
, NUM_REG_TYPES
* r
+ i
,
104 NUM_REG_TYPES
* r
+ j
);
109 ra_set_finalize(regs
, q_values
);
115 etna_ra_assign(struct etna_compile
*c
, nir_shader
*shader
)
117 struct etna_compiler
*compiler
= c
->variant
->shader
->compiler
;
118 struct ra_regs
*regs
= compiler
->regs
;
120 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
122 /* liveness and interference */
124 nir_index_blocks(impl
);
125 nir_index_ssa_defs(impl
);
126 nir_foreach_block(block
, impl
) {
127 nir_foreach_instr(instr
, block
)
128 instr
->pass_flags
= 0;
131 /* this gives an approximation/upper limit on how many nodes are needed
132 * (some ssa values do not represent an allocated register)
134 unsigned max_nodes
= impl
->ssa_alloc
+ impl
->reg_alloc
;
135 unsigned *live_map
= ralloc_array(NULL
, unsigned, max_nodes
);
136 memset(live_map
, 0xff, sizeof(unsigned) * max_nodes
);
137 struct live_def
*defs
= rzalloc_array(NULL
, struct live_def
, max_nodes
);
139 unsigned num_nodes
= etna_live_defs(impl
, defs
, live_map
);
140 struct ra_graph
*g
= ra_alloc_interference_graph(regs
, num_nodes
);
142 /* set classes from num_components */
143 for (unsigned i
= 0; i
< num_nodes
; i
++) {
144 nir_instr
*instr
= defs
[i
].instr
;
145 nir_dest
*dest
= defs
[i
].dest
;
146 unsigned comp
= nir_dest_num_components(*dest
) - 1;
148 if (instr
->type
== nir_instr_type_alu
&&
149 c
->specs
->has_new_transcendentals
) {
150 switch (nir_instr_as_alu(instr
)->op
) {
155 assert(dest
->is_ssa
);
156 comp
= REG_CLASS_VIRT_VEC2T
;
162 if (instr
->type
== nir_instr_type_intrinsic
) {
163 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
164 /* can't have dst swizzle or sparse writemask on UBO loads */
165 if (intr
->intrinsic
== nir_intrinsic_load_ubo
) {
166 assert(dest
== &intr
->dest
);
167 if (dest
->ssa
.num_components
== 2)
168 comp
= REG_CLASS_VIRT_VEC2C
;
169 if (dest
->ssa
.num_components
== 3)
170 comp
= REG_CLASS_VIRT_VEC3C
;
174 ra_set_node_class(g
, i
, comp
);
177 nir_foreach_block(block
, impl
) {
178 nir_foreach_instr(instr
, block
) {
179 if (instr
->type
!= nir_instr_type_intrinsic
)
182 nir_dest
*dest
= dest_for_instr(instr
);
183 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
186 switch (intr
->intrinsic
) {
187 case nir_intrinsic_store_deref
: {
188 /* don't want outputs to be swizzled
189 * TODO: better would be to set the type to X/XY/XYZ/XYZW
190 * TODO: what if fragcoord.z is read after writing fragdepth?
192 nir_deref_instr
*deref
= nir_src_as_deref(intr
->src
[0]);
193 unsigned index
= live_map
[src_index(impl
, &intr
->src
[1])];
195 if (shader
->info
.stage
== MESA_SHADER_FRAGMENT
&&
196 deref
->var
->data
.location
== FRAG_RESULT_DEPTH
) {
197 ra_set_node_reg(g
, index
, REG_FRAG_DEPTH
);
199 ra_set_node_class(g
, index
, REG_CLASS_VEC4
);
202 case nir_intrinsic_load_input
:
203 reg
= nir_intrinsic_base(intr
) * NUM_REG_TYPES
+ (unsigned[]) {
204 REG_TYPE_VIRT_SCALAR_X
,
205 REG_TYPE_VIRT_VEC2_XY
,
206 REG_TYPE_VIRT_VEC3_XYZ
,
208 }[nir_dest_num_components(*dest
) - 1];
210 case nir_intrinsic_load_instance_id
:
211 reg
= c
->variant
->infile
.num_reg
* NUM_REG_TYPES
+ REG_TYPE_VIRT_SCALAR_Y
;
217 ra_set_node_reg(g
, live_map
[dest_index(impl
, dest
)], reg
);
221 /* add interference for intersecting live ranges */
222 for (unsigned i
= 0; i
< num_nodes
; i
++) {
223 assert(defs
[i
].live_start
< defs
[i
].live_end
);
224 for (unsigned j
= 0; j
< i
; j
++) {
225 if (defs
[i
].live_start
>= defs
[j
].live_end
|| defs
[j
].live_start
>= defs
[i
].live_end
)
227 ra_add_node_interference(g
, i
, j
);
233 /* Allocate registers */
234 ASSERTED
bool ok
= ra_allocate(g
);
238 c
->live_map
= live_map
;
239 c
->num_nodes
= num_nodes
;
243 etna_ra_finish(struct etna_compile
*c
)
245 /* TODO: better way to get number of registers used? */
247 for (unsigned i
= 0; i
< c
->num_nodes
; i
++) {
248 j
= MAX2(j
, reg_get_base(c
, ra_get_node_reg(c
->g
, i
)) + 1);
252 ralloc_free(c
->live_map
);