vc4: Bring over cleanups from submitting to the kernel.
[mesa.git] / src / gallium / drivers / vc4 / kernel / vc4_validate_shaders.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * DOC: Shader validator for VC4.
26 *
27 * The VC4 has no IOMMU between it and system memory, so a user with
28 * access to execute shaders could escalate privilege by overwriting
29 * system memory (using the VPM write address register in the
30 * general-purpose DMA mode) or reading system memory it shouldn't
31 * (reading it as a texture, or uniform data, or vertex data).
32 *
33 * This walks over a shader BO, ensuring that its accesses are
34 * appropriately bounded, and recording how many texture accesses are
35 * made and where so that we can do relocations for them in the
36 * uniform stream.
37 */
38
39 #include "vc4_drv.h"
40 #include "vc4_qpu.h"
41 #include "vc4_qpu_defines.h"
42
43 struct vc4_shader_validation_state {
44 struct vc4_texture_sample_info tmu_setup[2];
45 int tmu_write_count[2];
46
47 /* For registers that were last written to by a MIN instruction with
48 * one argument being a uniform, the address of the uniform.
49 * Otherwise, ~0.
50 *
51 * This is used for the validation of direct address memory reads.
52 */
53 uint32_t live_min_clamp_offsets[32 + 32 + 4];
54 bool live_max_clamp_regs[32 + 32 + 4];
55 };
56
57 static uint32_t
58 waddr_to_live_reg_index(uint32_t waddr, bool is_b)
59 {
60 if (waddr < 32) {
61 if (is_b)
62 return 32 + waddr;
63 else
64 return waddr;
65 } else if (waddr <= QPU_W_ACC3) {
66 return 64 + waddr - QPU_W_ACC0;
67 } else {
68 return ~0;
69 }
70 }
71
72 static uint32_t
73 raddr_add_a_to_live_reg_index(uint64_t inst)
74 {
75 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
76 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
77 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
78 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
79
80 if (add_a == QPU_MUX_A)
81 return raddr_a;
82 else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
83 return 32 + raddr_b;
84 else if (add_a <= QPU_MUX_R3)
85 return 64 + add_a;
86 else
87 return ~0;
88 }
89
90 static bool
91 is_tmu_submit(uint32_t waddr)
92 {
93 return (waddr == QPU_W_TMU0_S ||
94 waddr == QPU_W_TMU1_S);
95 }
96
97 static bool
98 is_tmu_write(uint32_t waddr)
99 {
100 return (waddr >= QPU_W_TMU0_S &&
101 waddr <= QPU_W_TMU1_B);
102 }
103
104 static bool
105 record_texture_sample(struct vc4_validated_shader_info *validated_shader,
106 struct vc4_shader_validation_state *validation_state,
107 int tmu)
108 {
109 uint32_t s = validated_shader->num_texture_samples;
110 int i;
111 struct vc4_texture_sample_info *temp_samples;
112
113 temp_samples = krealloc(validated_shader->texture_samples,
114 (s + 1) * sizeof(*temp_samples),
115 GFP_KERNEL);
116 if (!temp_samples)
117 return false;
118
119 memcpy(&temp_samples[s],
120 &validation_state->tmu_setup[tmu],
121 sizeof(*temp_samples));
122
123 validated_shader->num_texture_samples = s + 1;
124 validated_shader->texture_samples = temp_samples;
125
126 for (i = 0; i < 4; i++)
127 validation_state->tmu_setup[tmu].p_offset[i] = ~0;
128
129 return true;
130 }
131
132 static bool
133 check_tmu_write(uint64_t inst,
134 struct vc4_validated_shader_info *validated_shader,
135 struct vc4_shader_validation_state *validation_state,
136 bool is_mul)
137 {
138 uint32_t waddr = (is_mul ?
139 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
140 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
141 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
142 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
143 int tmu = waddr > QPU_W_TMU0_B;
144 bool submit = is_tmu_submit(waddr);
145 bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
146 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
147
148 if (is_direct) {
149 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
150 uint32_t clamp_reg, clamp_offset;
151
152 if (sig == QPU_SIG_SMALL_IMM) {
153 DRM_ERROR("direct TMU read used small immediate\n");
154 return false;
155 }
156
157 /* Make sure that this texture load is an add of the base
158 * address of the UBO to a clamped offset within the UBO.
159 */
160 if (is_mul ||
161 QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
162 DRM_ERROR("direct TMU load wasn't an add\n");
163 return false;
164 }
165
166 /* We assert that the the clamped address is the first
167 * argument, and the UBO base address is the second argument.
168 * This is arbitrary, but simpler than supporting flipping the
169 * two either way.
170 */
171 clamp_reg = raddr_add_a_to_live_reg_index(inst);
172 if (clamp_reg == ~0) {
173 DRM_ERROR("direct TMU load wasn't clamped\n");
174 return false;
175 }
176
177 clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
178 if (clamp_offset == ~0) {
179 DRM_ERROR("direct TMU load wasn't clamped\n");
180 return false;
181 }
182
183 /* Store the clamp value's offset in p1 (see reloc_tex() in
184 * vc4_validate.c).
185 */
186 validation_state->tmu_setup[tmu].p_offset[1] =
187 clamp_offset;
188
189 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
190 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
191 DRM_ERROR("direct TMU load didn't add to a uniform\n");
192 return false;
193 }
194
195 validation_state->tmu_setup[tmu].is_direct = true;
196 } else {
197 if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
198 raddr_b == QPU_R_UNIF)) {
199 DRM_ERROR("uniform read in the same instruction as "
200 "texture setup.\n");
201 return false;
202 }
203 }
204
205 if (validation_state->tmu_write_count[tmu] >= 4) {
206 DRM_ERROR("TMU%d got too many parameters before dispatch\n",
207 tmu);
208 return false;
209 }
210 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
211 validated_shader->uniforms_size;
212 validation_state->tmu_write_count[tmu]++;
213 /* Since direct uses a RADDR uniform reference, it will get counted in
214 * check_instruction_reads()
215 */
216 if (!is_direct)
217 validated_shader->uniforms_size += 4;
218
219 if (submit) {
220 if (!record_texture_sample(validated_shader,
221 validation_state, tmu)) {
222 return false;
223 }
224
225 validation_state->tmu_write_count[tmu] = 0;
226 }
227
228 return true;
229 }
230
231 static bool
232 check_reg_write(uint64_t inst,
233 struct vc4_validated_shader_info *validated_shader,
234 struct vc4_shader_validation_state *validation_state,
235 bool is_mul)
236 {
237 uint32_t waddr = (is_mul ?
238 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
239 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
240
241 switch (waddr) {
242 case QPU_W_UNIFORMS_ADDRESS:
243 /* XXX: We'll probably need to support this for reladdr, but
244 * it's definitely a security-related one.
245 */
246 DRM_ERROR("uniforms address load unsupported\n");
247 return false;
248
249 case QPU_W_TLB_COLOR_MS:
250 case QPU_W_TLB_COLOR_ALL:
251 case QPU_W_TLB_Z:
252 /* These only interact with the tile buffer, not main memory,
253 * so they're safe.
254 */
255 return true;
256
257 case QPU_W_TMU0_S:
258 case QPU_W_TMU0_T:
259 case QPU_W_TMU0_R:
260 case QPU_W_TMU0_B:
261 case QPU_W_TMU1_S:
262 case QPU_W_TMU1_T:
263 case QPU_W_TMU1_R:
264 case QPU_W_TMU1_B:
265 return check_tmu_write(inst, validated_shader, validation_state,
266 is_mul);
267
268 case QPU_W_HOST_INT:
269 case QPU_W_TMU_NOSWAP:
270 case QPU_W_TLB_ALPHA_MASK:
271 case QPU_W_MUTEX_RELEASE:
272 /* XXX: I haven't thought about these, so don't support them
273 * for now.
274 */
275 DRM_ERROR("Unsupported waddr %d\n", waddr);
276 return false;
277
278 case QPU_W_VPM_ADDR:
279 DRM_ERROR("General VPM DMA unsupported\n");
280 return false;
281
282 case QPU_W_VPM:
283 case QPU_W_VPMVCD_SETUP:
284 /* We allow VPM setup in general, even including VPM DMA
285 * configuration setup, because the (unsafe) DMA can only be
286 * triggered by QPU_W_VPM_ADDR writes.
287 */
288 return true;
289
290 case QPU_W_TLB_STENCIL_SETUP:
291 return true;
292 }
293
294 return true;
295 }
296
297 static void
298 track_live_clamps(uint64_t inst,
299 struct vc4_validated_shader_info *validated_shader,
300 struct vc4_shader_validation_state *validation_state)
301 {
302 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
303 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
304 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
305 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
306 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
307 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
308 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
309 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
310 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
311 bool ws = inst & QPU_WS;
312 uint32_t lri_add_a, lri_add, lri_mul;
313 bool add_a_is_min_0;
314
315 /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
316 * before we clear previous live state.
317 */
318 lri_add_a = raddr_add_a_to_live_reg_index(inst);
319 add_a_is_min_0 = (lri_add_a != ~0 &&
320 validation_state->live_max_clamp_regs[lri_add_a]);
321
322 /* Clear live state for registers written by our instruction. */
323 lri_add = waddr_to_live_reg_index(waddr_add, ws);
324 lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
325 if (lri_mul != ~0) {
326 validation_state->live_max_clamp_regs[lri_mul] = false;
327 validation_state->live_min_clamp_offsets[lri_mul] = ~0;
328 }
329 if (lri_add != ~0) {
330 validation_state->live_max_clamp_regs[lri_add] = false;
331 validation_state->live_min_clamp_offsets[lri_add] = ~0;
332 } else {
333 /* Nothing further to do for live tracking, since only ADDs
334 * generate new live clamp registers.
335 */
336 return;
337 }
338
339 /* Now, handle remaining live clamp tracking for the ADD operation. */
340
341 if (cond_add != QPU_COND_ALWAYS)
342 return;
343
344 if (op_add == QPU_A_MAX) {
345 /* Track live clamps of a value to a minimum of 0 (in either
346 * arg).
347 */
348 if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
349 (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
350 return;
351 }
352
353 validation_state->live_max_clamp_regs[lri_add] = true;
354 } else if (op_add == QPU_A_MIN) {
355 /* Track live clamps of a value clamped to a minimum of 0 and
356 * a maximum of some uniform's offset.
357 */
358 if (!add_a_is_min_0)
359 return;
360
361 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
362 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
363 sig != QPU_SIG_SMALL_IMM)) {
364 return;
365 }
366
367 validation_state->live_min_clamp_offsets[lri_add] =
368 validated_shader->uniforms_size;
369 }
370 }
371
372 static bool
373 check_instruction_writes(uint64_t inst,
374 struct vc4_validated_shader_info *validated_shader,
375 struct vc4_shader_validation_state *validation_state)
376 {
377 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
378 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
379 bool ok;
380
381 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
382 DRM_ERROR("ADD and MUL both set up textures\n");
383 return false;
384 }
385
386 ok = (check_reg_write(inst, validated_shader, validation_state,
387 false) &&
388 check_reg_write(inst, validated_shader, validation_state,
389 true));
390
391 track_live_clamps(inst, validated_shader, validation_state);
392
393 return ok;
394 }
395
396 static bool
397 check_instruction_reads(uint64_t inst,
398 struct vc4_validated_shader_info *validated_shader)
399 {
400 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
401 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
402 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
403
404 if (raddr_a == QPU_R_UNIF ||
405 (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
406 /* This can't overflow the uint32_t, because we're reading 8
407 * bytes of instruction to increment by 4 here, so we'd
408 * already be OOM.
409 */
410 validated_shader->uniforms_size += 4;
411 }
412
413 return true;
414 }
415
416 struct vc4_validated_shader_info *
417 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
418 {
419 bool found_shader_end = false;
420 int shader_end_ip = 0;
421 uint32_t ip, max_ip;
422 uint64_t *shader;
423 struct vc4_validated_shader_info *validated_shader;
424 struct vc4_shader_validation_state validation_state;
425 int i;
426
427 memset(&validation_state, 0, sizeof(validation_state));
428
429 for (i = 0; i < 8; i++)
430 validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
431 for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
432 validation_state.live_min_clamp_offsets[i] = ~0;
433
434 shader = shader_obj->vaddr;
435 max_ip = shader_obj->base.size / sizeof(uint64_t);
436
437 validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
438 if (!validated_shader)
439 return NULL;
440
441 for (ip = 0; ip < max_ip; ip++) {
442 uint64_t inst = shader[ip];
443 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
444
445 switch (sig) {
446 case QPU_SIG_NONE:
447 case QPU_SIG_WAIT_FOR_SCOREBOARD:
448 case QPU_SIG_SCOREBOARD_UNLOCK:
449 case QPU_SIG_COLOR_LOAD:
450 case QPU_SIG_LOAD_TMU0:
451 case QPU_SIG_LOAD_TMU1:
452 case QPU_SIG_PROG_END:
453 case QPU_SIG_SMALL_IMM:
454 if (!check_instruction_writes(inst, validated_shader,
455 &validation_state)) {
456 DRM_ERROR("Bad write at ip %d\n", ip);
457 goto fail;
458 }
459
460 if (!check_instruction_reads(inst, validated_shader))
461 goto fail;
462
463 if (sig == QPU_SIG_PROG_END) {
464 found_shader_end = true;
465 shader_end_ip = ip;
466 }
467
468 break;
469
470 case QPU_SIG_LOAD_IMM:
471 if (!check_instruction_writes(inst, validated_shader,
472 &validation_state)) {
473 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
474 goto fail;
475 }
476 break;
477
478 default:
479 DRM_ERROR("Unsupported QPU signal %d at "
480 "instruction %d\n", sig, ip);
481 goto fail;
482 }
483
484 /* There are two delay slots after program end is signaled
485 * that are still executed, then we're finished.
486 */
487 if (found_shader_end && ip == shader_end_ip + 2)
488 break;
489 }
490
491 if (ip == max_ip) {
492 DRM_ERROR("shader failed to terminate before "
493 "shader BO end at %zd\n",
494 shader_obj->base.size);
495 goto fail;
496 }
497
498 /* Again, no chance of integer overflow here because the worst case
499 * scenario is 8 bytes of uniforms plus handles per 8-byte
500 * instruction.
501 */
502 validated_shader->uniforms_src_size =
503 (validated_shader->uniforms_size +
504 4 * validated_shader->num_texture_samples);
505
506 return validated_shader;
507
508 fail:
509 if (validated_shader) {
510 kfree(validated_shader->texture_samples);
511 kfree(validated_shader);
512 }
513 return NULL;
514 }