2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/ralloc.h"
25 #include "util/register_allocate.h"
26 #include "vc4_context.h"
30 #define QPU_R(file, index) { QPU_MUX_##file, index }
32 static const struct qpu_reg vc4_regs
[] = {
104 #define A_INDEX (ACC_INDEX + 5)
105 #define B_INDEX (A_INDEX + 32)
108 vc4_alloc_reg_set(struct vc4_context
*vc4
)
110 assert(vc4_regs
[A_INDEX
].addr
== 0);
111 assert(vc4_regs
[B_INDEX
].addr
== 0);
112 STATIC_ASSERT(ARRAY_SIZE(vc4_regs
) == B_INDEX
+ 32);
117 vc4
->regs
= ra_alloc_reg_set(vc4
, ARRAY_SIZE(vc4_regs
));
119 vc4
->reg_class_any
= ra_alloc_reg_class(vc4
->regs
);
120 for (uint32_t i
= 0; i
< ARRAY_SIZE(vc4_regs
); i
++) {
121 /* Reserve r3 for now, since we're using it for spilling-like
122 * operations in vc4_qpu_emit.c
124 if (vc4_regs
[i
].mux
== QPU_MUX_R3
)
127 /* R4 can't be written as a general purpose register. (it's
128 * TMU_NOSWAP as a write address).
130 if (vc4_regs
[i
].mux
== QPU_MUX_R4
)
133 ra_class_add_reg(vc4
->regs
, vc4
->reg_class_any
, i
);
136 vc4
->reg_class_a
= ra_alloc_reg_class(vc4
->regs
);
137 for (uint32_t i
= A_INDEX
; i
< A_INDEX
+ 32; i
++)
138 ra_class_add_reg(vc4
->regs
, vc4
->reg_class_a
, i
);
140 ra_set_finalize(vc4
->regs
, NULL
);
144 * Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
146 * The return value should be freed by the caller.
149 vc4_register_allocate(struct vc4_context
*vc4
, struct vc4_compile
*c
)
151 struct simple_node
*node
;
152 uint32_t def
[c
->num_temps
];
153 uint32_t use
[c
->num_temps
];
154 struct qpu_reg
*temp_registers
= calloc(c
->num_temps
,
155 sizeof(*temp_registers
));
156 memset(def
, 0, sizeof(def
));
157 memset(use
, 0, sizeof(use
));
159 /* If things aren't ever written (undefined values), just read from
162 for (uint32_t i
= 0; i
< c
->num_temps
; i
++)
163 temp_registers
[i
] = qpu_rn(0);
165 vc4_alloc_reg_set(vc4
);
167 struct ra_graph
*g
= ra_alloc_interference_graph(vc4
->regs
,
170 for (uint32_t i
= 0; i
< c
->num_temps
; i
++)
171 ra_set_node_class(g
, i
, vc4
->reg_class_any
);
173 /* Compute the live ranges so we can figure out interference, and
174 * figure out our register classes and preallocated registers.
177 foreach(node
, &c
->instructions
) {
178 struct qinst
*inst
= (struct qinst
*)node
;
180 if (inst
->dst
.file
== QFILE_TEMP
) {
181 def
[inst
->dst
.index
] = ip
;
182 use
[inst
->dst
.index
] = ip
;
185 for (int i
= 0; i
< qir_get_op_nsrc(inst
->op
); i
++) {
186 if (inst
->src
[i
].file
== QFILE_TEMP
)
187 use
[inst
->src
[i
].index
] = ip
;
192 def
[inst
->dst
.index
] = 0;
193 ra_set_node_reg(g
, inst
->dst
.index
,
194 B_INDEX
+ QPU_R_FRAG_PAYLOAD_ZW
);
198 def
[inst
->dst
.index
] = 0;
199 ra_set_node_reg(g
, inst
->dst
.index
,
200 A_INDEX
+ QPU_R_FRAG_PAYLOAD_ZW
);
204 case QOP_TLB_COLOR_READ
:
205 assert(vc4_regs
[ACC_INDEX
+ 4].mux
== QPU_MUX_R4
);
206 ra_set_node_reg(g
, inst
->dst
.index
,
210 case QOP_PACK_SCALED
:
211 /* The pack flags require an A-file dst register. */
212 ra_set_node_class(g
, inst
->dst
.index
, vc4
->reg_class_a
);
219 /* The unpack flags require an A-file src register. */
220 ra_set_node_class(g
, inst
->src
[0].index
, vc4
->reg_class_a
);
230 for (uint32_t i
= 0; i
< c
->num_temps
; i
++) {
231 for (uint32_t j
= i
+ 1; j
< c
->num_temps
; j
++) {
232 if (!(def
[i
] >= use
[j
] || def
[j
] >= use
[i
]))
233 ra_add_node_interference(g
, i
, j
);
237 bool ok
= ra_allocate(g
);
240 for (uint32_t i
= 0; i
< c
->num_temps
; i
++) {
241 temp_registers
[i
] = vc4_regs
[ra_get_node_reg(g
, i
)];
243 /* If the value's never used, just write to the NOP register
244 * for clarity in debug output.
246 if (def
[i
] == use
[i
])
247 temp_registers
[i
] = qpu_ra(QPU_W_NOP
);
252 return temp_registers
;