vc4: Introduce scheduling of QPU instructions.
[mesa.git] / src / gallium / drivers / vc4 / vc4_qpu.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdbool.h>
25 #include "vc4_qir.h"
26 #include "vc4_qpu.h"
27
28 static uint64_t
29 set_src_raddr(uint64_t inst, struct qpu_reg src)
30 {
31 if (src.mux == QPU_MUX_A) {
32 assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
33 QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
34 return ((inst & ~QPU_RADDR_A_MASK) |
35 QPU_SET_FIELD(src.addr, QPU_RADDR_A));
36 }
37
38 if (src.mux == QPU_MUX_B) {
39 assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
40 QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
41 return ((inst & ~QPU_RADDR_B_MASK) |
42 QPU_SET_FIELD(src.addr, QPU_RADDR_B));
43 }
44
45 return inst;
46 }
47
48 uint64_t
49 qpu_NOP()
50 {
51 uint64_t inst = 0;
52
53 inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);
54 inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);
55
56 /* Note: These field values are actually non-zero */
57 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
58 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
59 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
60 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
61 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
62
63 return inst;
64 }
65
66 static uint64_t
67 qpu_a_dst(struct qpu_reg dst)
68 {
69 uint64_t inst = 0;
70
71 if (dst.mux <= QPU_MUX_R5) {
72 /* Translate the mux to the ACCn values. */
73 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);
74 } else {
75 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);
76 if (dst.mux == QPU_MUX_B)
77 inst |= QPU_WS;
78 }
79
80 return inst;
81 }
82
83 static uint64_t
84 qpu_m_dst(struct qpu_reg dst)
85 {
86 uint64_t inst = 0;
87
88 if (dst.mux <= QPU_MUX_R5) {
89 /* Translate the mux to the ACCn values. */
90 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);
91 } else {
92 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);
93 if (dst.mux == QPU_MUX_A)
94 inst |= QPU_WS;
95 }
96
97 return inst;
98 }
99
100 uint64_t
101 qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)
102 {
103 uint64_t inst = 0;
104
105 inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
106 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
107 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
108 inst |= qpu_a_dst(dst);
109 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
110 inst |= QPU_SET_FIELD(src.mux, QPU_ADD_A);
111 inst |= QPU_SET_FIELD(src.mux, QPU_ADD_B);
112 inst = set_src_raddr(inst, src);
113 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
114 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
115
116 return inst;
117 }
118
119 uint64_t
120 qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)
121 {
122 uint64_t inst = 0;
123
124 inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
125 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
126 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
127 inst |= qpu_m_dst(dst);
128 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
129 inst |= QPU_SET_FIELD(src.mux, QPU_MUL_A);
130 inst |= QPU_SET_FIELD(src.mux, QPU_MUL_B);
131 inst = set_src_raddr(inst, src);
132 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
133 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
134
135 return inst;
136 }
137
138 uint64_t
139 qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
140 {
141 uint64_t inst = 0;
142
143 inst |= qpu_a_dst(dst);
144 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
145 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
146 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
147 inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);
148 inst |= val;
149
150 return inst;
151 }
152
153 uint64_t
154 qpu_a_alu2(enum qpu_op_add op,
155 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
156 {
157 uint64_t inst = 0;
158
159 inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
160 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
161 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
162 inst |= qpu_a_dst(dst);
163 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
164 inst |= QPU_SET_FIELD(src0.mux, QPU_ADD_A);
165 inst = set_src_raddr(inst, src0);
166 inst |= QPU_SET_FIELD(src1.mux, QPU_ADD_B);
167 inst = set_src_raddr(inst, src1);
168 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
169 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
170
171 return inst;
172 }
173
174 uint64_t
175 qpu_m_alu2(enum qpu_op_mul op,
176 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
177 {
178 uint64_t inst = 0;
179
180 inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
181 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
182 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
183 inst |= qpu_m_dst(dst);
184 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
185 inst |= QPU_SET_FIELD(src0.mux, QPU_MUL_A);
186 inst = set_src_raddr(inst, src0);
187 inst |= QPU_SET_FIELD(src1.mux, QPU_MUL_B);
188 inst = set_src_raddr(inst, src1);
189 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
190 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
191
192 return inst;
193 }
194
195 static uint64_t
196 merge_fields(uint64_t merge,
197 uint64_t add, uint64_t mul,
198 uint64_t mask, uint64_t ignore)
199 {
200 if ((add & mask) == ignore)
201 return (merge & ~mask) | (mul & mask);
202 else if ((mul & mask) == ignore)
203 return (merge & ~mask) | (add & mask);
204 else {
205 assert((add & mask) == (mul & mask));
206 return merge;
207 }
208 }
209
210 uint64_t
211 qpu_inst(uint64_t add, uint64_t mul)
212 {
213 uint64_t merge = ((add & ~QPU_WADDR_MUL_MASK) |
214 (mul & ~QPU_WADDR_ADD_MASK));
215
216 merge = merge_fields(merge, add, mul, QPU_SIG_MASK,
217 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
218
219 merge = merge_fields(merge, add, mul, QPU_RADDR_A_MASK,
220 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A));
221 merge = merge_fields(merge, add, mul, QPU_RADDR_B_MASK,
222 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
223
224 return merge;
225 }
226
227 uint64_t
228 qpu_set_sig(uint64_t inst, uint32_t sig)
229 {
230 assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);
231 return (inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(sig, QPU_SIG);
232 }
233
234 uint64_t
235 qpu_set_cond_add(uint64_t inst, uint32_t sig)
236 {
237 assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);
238 return (inst & ~QPU_COND_ADD_MASK) | QPU_SET_FIELD(sig, QPU_COND_ADD);
239 }
240
241 uint64_t
242 qpu_set_cond_mul(uint64_t inst, uint32_t sig)
243 {
244 assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
245 return (inst & ~QPU_COND_MUL_MASK) | QPU_SET_FIELD(sig, QPU_COND_MUL);
246 }
247
248 bool
249 qpu_waddr_is_tlb(uint32_t waddr)
250 {
251 switch (waddr) {
252 case QPU_W_TLB_COLOR_ALL:
253 case QPU_W_TLB_COLOR_MS:
254 case QPU_W_TLB_Z:
255 return true;
256 default:
257 return false;
258 }
259 }
260
261 bool
262 qpu_inst_is_tlb(uint64_t inst)
263 {
264 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
265
266 return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
267 qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
268 sig == QPU_SIG_COLOR_LOAD ||
269 sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
270 }
271
272 void
273 qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
274 {
275 if (c->qpu_inst_count >= c->qpu_inst_size) {
276 c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);
277 c->qpu_insts = realloc(c->qpu_insts,
278 c->qpu_inst_size * sizeof(uint64_t));
279 }
280 c->qpu_insts[c->qpu_inst_count++] = inst;
281 }