2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 writes_reg(uint64_t inst
, uint32_t w
)
29 return (QPU_GET_FIELD(inst
, QPU_WADDR_ADD
) == w
||
30 QPU_GET_FIELD(inst
, QPU_WADDR_MUL
) == w
);
34 _reads_reg(uint64_t inst
, uint32_t r
, bool ignore_a
, bool ignore_b
)
39 { QPU_GET_FIELD(inst
, QPU_ADD_A
) },
40 { QPU_GET_FIELD(inst
, QPU_ADD_B
) },
41 { QPU_GET_FIELD(inst
, QPU_MUL_A
) },
42 { QPU_GET_FIELD(inst
, QPU_MUL_B
) },
45 for (int i
= 0; i
< ARRAY_SIZE(src_regs
); i
++) {
47 src_regs
[i
].mux
== QPU_MUX_A
&&
48 (QPU_GET_FIELD(inst
, QPU_RADDR_A
) == r
))
52 src_regs
[i
].mux
== QPU_MUX_B
&&
53 (QPU_GET_FIELD(inst
, QPU_RADDR_B
) == r
))
61 reads_reg(uint64_t inst
, uint32_t r
)
63 return _reads_reg(inst
, r
, false, false);
67 reads_a_reg(uint64_t inst
, uint32_t r
)
69 return _reads_reg(inst
, r
, false, true);
73 reads_b_reg(uint64_t inst
, uint32_t r
)
75 return _reads_reg(inst
, r
, true, false);
79 writes_sfu(uint64_t inst
)
81 return (writes_reg(inst
, QPU_W_SFU_RECIP
) ||
82 writes_reg(inst
, QPU_W_SFU_RECIPSQRT
) ||
83 writes_reg(inst
, QPU_W_SFU_EXP
) ||
84 writes_reg(inst
, QPU_W_SFU_LOG
));
88 * Checks for the instruction restrictions from page 37 ("Summary of
89 * Instruction Restrictions").
92 vc4_qpu_validate(uint64_t *insts
, uint32_t num_inst
)
94 bool scoreboard_locked
= false;
96 for (int i
= 0; i
< num_inst
; i
++) {
97 uint64_t inst
= insts
[i
];
99 if (QPU_GET_FIELD(inst
, QPU_SIG
) != QPU_SIG_PROG_END
) {
100 if (qpu_inst_is_tlb(inst
))
101 scoreboard_locked
= true;
106 /* "The Thread End instruction must not write to either physical
109 assert(QPU_GET_FIELD(inst
, QPU_WADDR_ADD
) >= 32);
110 assert(QPU_GET_FIELD(inst
, QPU_WADDR_MUL
) >= 32);
112 /* Can't trigger an implicit wait on scoreboard in the program
115 assert(!qpu_inst_is_tlb(inst
) || scoreboard_locked
);
117 /* Two delay slots will be executed. */
118 assert(i
+ 2 <= num_inst
);
120 for (int j
= i
; j
< i
+ 2; j
++) {
121 /* "The last three instructions of any program
122 * (Thread End plus the following two delay-slot
123 * instructions) must not do varyings read, uniforms
124 * read or any kind of VPM, VDR, or VDW read or
127 assert(!writes_reg(insts
[j
], QPU_W_VPM
));
128 assert(!reads_reg(insts
[j
], QPU_R_VARY
));
129 assert(!reads_reg(insts
[j
], QPU_R_UNIF
));
130 assert(!reads_reg(insts
[j
], QPU_R_VPM
));
132 /* "The Thread End instruction and the following two
133 * delay slot instructions must not write or read
134 * address 14 in either regfile A or B."
136 assert(!writes_reg(insts
[j
], 14));
137 assert(!reads_reg(insts
[j
], 14));
141 /* "The final program instruction (the second delay slot
142 * instruction) must not do a TLB Z write."
144 assert(!writes_reg(insts
[i
+ 2], QPU_W_TLB_Z
));
147 /* "A scoreboard wait must not occur in the first two instructions of
148 * a fragment shader. This is either the explicit Wait for Scoreboard
149 * signal or an implicit wait with the first tile-buffer read or
150 * write instruction."
152 for (int i
= 0; i
< 2; i
++) {
153 uint64_t inst
= insts
[i
];
155 assert(!qpu_inst_is_tlb(inst
));
158 /* "If TMU_NOSWAP is written, the write must be three instructions
159 * before the first TMU write instruction. For example, if
160 * TMU_NOSWAP is written in the first shader instruction, the first
161 * TMU write cannot occur before the 4th shader instruction."
163 int last_tmu_noswap
= -10;
164 for (int i
= 0; i
< num_inst
; i
++) {
165 uint64_t inst
= insts
[i
];
167 assert((i
- last_tmu_noswap
) > 3 ||
168 (!writes_reg(inst
, QPU_W_TMU0_S
) &&
169 !writes_reg(inst
, QPU_W_TMU1_S
)));
171 if (writes_reg(inst
, QPU_W_TMU_NOSWAP
))
175 /* "An instruction must not read from a location in physical regfile A
176 * or B that was written to by the previous instruction."
178 for (int i
= 0; i
< num_inst
- 1; i
++) {
179 uint64_t inst
= insts
[i
];
180 uint32_t add_waddr
= QPU_GET_FIELD(inst
, QPU_WADDR_ADD
);
181 uint32_t mul_waddr
= QPU_GET_FIELD(inst
, QPU_WADDR_MUL
);
182 uint32_t waddr_a
, waddr_b
;
192 assert(waddr_a
>= 32 || !reads_a_reg(insts
[i
+ 1], waddr_a
));
193 assert(waddr_b
>= 32 || !reads_b_reg(insts
[i
+ 1], waddr_b
));
196 /* "After an SFU lookup instruction, accumulator r4 must not be read
197 * in the following two instructions. Any other instruction that
198 * results in r4 being written (that is, TMU read, TLB read, SFU
199 * lookup) cannot occur in the two instructions following an SFU
202 int last_sfu_inst
= -10;
203 for (int i
= 0; i
< num_inst
- 1; i
++) {
204 uint64_t inst
= insts
[i
];
206 assert(i
- last_sfu_inst
> 2 ||
207 (!writes_sfu(inst
) &&
208 !writes_reg(inst
, QPU_W_TMU0_S
) &&
209 !writes_reg(inst
, QPU_W_TMU1_S
) &&
210 QPU_GET_FIELD(inst
, QPU_SIG
) != QPU_SIG_COLOR_LOAD
));
212 if (writes_sfu(inst
))
216 int last_r5_write
= -10;
217 for (int i
= 0; i
< num_inst
- 1; i
++) {
218 uint64_t inst
= insts
[i
];
220 /* "An instruction that does a vector rotate by r5 must not
221 * immediately follow an instruction that writes to r5."
223 assert(last_r5_write
!= i
- 1 ||
224 QPU_GET_FIELD(inst
, QPU_SIG
) != QPU_SIG_SMALL_IMM
||
225 QPU_GET_FIELD(inst
, QPU_SMALL_IMM
) != 48);
228 /* "An instruction that does a vector rotate must not immediately
229 * follow an instruction that writes to the accumulator that is being
235 /* "After an instruction that does a TLB Z write, the multisample mask
236 * must not be read as an instruction input argument in the following
237 * two instruction. The TLB Z write instruction can, however, be
238 * followed immediately by a TLB color write."
240 for (int i
= 0; i
< num_inst
- 1; i
++) {
241 uint64_t inst
= insts
[i
];
242 if (writes_reg(inst
, QPU_W_TLB_Z
)) {
243 assert(!reads_a_reg(insts
[i
+ 1], QPU_R_MS_REV_FLAGS
));
244 assert(!reads_a_reg(insts
[i
+ 2], QPU_R_MS_REV_FLAGS
));
249 * "A single instruction can only perform a maximum of one of the
250 * following closely coupled peripheral accesses in a single
251 * instruction: TMU write, TMU read, TLB write, TLB read, TLB
252 * combined color read and write, SFU write, Mutex read or Semaphore
255 for (int i
= 0; i
< num_inst
- 1; i
++) {
256 uint64_t inst
= insts
[i
];
258 static const uint32_t specials
[] = {
276 for (int j
= 0; j
< ARRAY_SIZE(specials
); j
++) {
277 if (writes_reg(inst
, specials
[j
]))
281 if (reads_reg(inst
, QPU_R_MUTEX_ACQUIRE
))
284 /* XXX: semaphore, combined color read/write? */
285 switch (QPU_GET_FIELD(inst
, QPU_SIG
)) {
286 case QPU_SIG_COLOR_LOAD
:
287 case QPU_SIG_COLOR_LOAD_END
:
288 case QPU_SIG_LOAD_TMU0
:
289 case QPU_SIG_LOAD_TMU1
:
293 assert(accesses
<= 1);