vc4: Add a bitmap of branch targets in kernel validation.
[mesa.git] / src / gallium / drivers / vc4 / kernel / vc4_validate_shaders.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * DOC: Shader validator for VC4.
26 *
27 * The VC4 has no IOMMU between it and system memory, so a user with
28 * access to execute shaders could escalate privilege by overwriting
29 * system memory (using the VPM write address register in the
30 * general-purpose DMA mode) or reading system memory it shouldn't
31 * (reading it as a texture, or uniform data, or vertex data).
32 *
33 * This walks over a shader BO, ensuring that its accesses are
34 * appropriately bounded, and recording how many texture accesses are
35 * made and where so that we can do relocations for them in the
36 * uniform stream.
37 */
38
39 #include "vc4_drv.h"
40 #include "vc4_qpu.h"
41 #include "vc4_qpu_defines.h"
42
43 struct vc4_shader_validation_state {
44 /* Current IP being validated. */
45 uint32_t ip;
46
47 /* IP at the end of the BO, do not read shader[max_ip] */
48 uint32_t max_ip;
49
50 uint64_t *shader;
51
52 struct vc4_texture_sample_info tmu_setup[2];
53 int tmu_write_count[2];
54
55 /* For registers that were last written to by a MIN instruction with
56 * one argument being a uniform, the address of the uniform.
57 * Otherwise, ~0.
58 *
59 * This is used for the validation of direct address memory reads.
60 */
61 uint32_t live_min_clamp_offsets[32 + 32 + 4];
62 bool live_max_clamp_regs[32 + 32 + 4];
63
64 /* Bitfield of which IPs are used as branch targets.
65 *
66 * Used for validation that the uniform stream is updated at the right
67 * points and clearing the texturing/clamping state.
68 */
69 unsigned long *branch_targets;
70 };
71
72 static uint32_t
73 waddr_to_live_reg_index(uint32_t waddr, bool is_b)
74 {
75 if (waddr < 32) {
76 if (is_b)
77 return 32 + waddr;
78 else
79 return waddr;
80 } else if (waddr <= QPU_W_ACC3) {
81 return 64 + waddr - QPU_W_ACC0;
82 } else {
83 return ~0;
84 }
85 }
86
87 static uint32_t
88 raddr_add_a_to_live_reg_index(uint64_t inst)
89 {
90 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
91 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
92 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
93 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
94
95 if (add_a == QPU_MUX_A)
96 return raddr_a;
97 else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
98 return 32 + raddr_b;
99 else if (add_a <= QPU_MUX_R3)
100 return 64 + add_a;
101 else
102 return ~0;
103 }
104
105 static bool
106 is_tmu_submit(uint32_t waddr)
107 {
108 return (waddr == QPU_W_TMU0_S ||
109 waddr == QPU_W_TMU1_S);
110 }
111
112 static bool
113 is_tmu_write(uint32_t waddr)
114 {
115 return (waddr >= QPU_W_TMU0_S &&
116 waddr <= QPU_W_TMU1_B);
117 }
118
119 static bool
120 record_texture_sample(struct vc4_validated_shader_info *validated_shader,
121 struct vc4_shader_validation_state *validation_state,
122 int tmu)
123 {
124 uint32_t s = validated_shader->num_texture_samples;
125 int i;
126 struct vc4_texture_sample_info *temp_samples;
127
128 temp_samples = krealloc(validated_shader->texture_samples,
129 (s + 1) * sizeof(*temp_samples),
130 GFP_KERNEL);
131 if (!temp_samples)
132 return false;
133
134 memcpy(&temp_samples[s],
135 &validation_state->tmu_setup[tmu],
136 sizeof(*temp_samples));
137
138 validated_shader->num_texture_samples = s + 1;
139 validated_shader->texture_samples = temp_samples;
140
141 for (i = 0; i < 4; i++)
142 validation_state->tmu_setup[tmu].p_offset[i] = ~0;
143
144 return true;
145 }
146
147 static bool
148 check_tmu_write(struct vc4_validated_shader_info *validated_shader,
149 struct vc4_shader_validation_state *validation_state,
150 bool is_mul)
151 {
152 uint64_t inst = validation_state->shader[validation_state->ip];
153 uint32_t waddr = (is_mul ?
154 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
155 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
156 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
157 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
158 int tmu = waddr > QPU_W_TMU0_B;
159 bool submit = is_tmu_submit(waddr);
160 bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
161 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
162
163 if (is_direct) {
164 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
165 uint32_t clamp_reg, clamp_offset;
166
167 if (sig == QPU_SIG_SMALL_IMM) {
168 DRM_ERROR("direct TMU read used small immediate\n");
169 return false;
170 }
171
172 /* Make sure that this texture load is an add of the base
173 * address of the UBO to a clamped offset within the UBO.
174 */
175 if (is_mul ||
176 QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
177 DRM_ERROR("direct TMU load wasn't an add\n");
178 return false;
179 }
180
181 /* We assert that the clamped address is the first
182 * argument, and the UBO base address is the second argument.
183 * This is arbitrary, but simpler than supporting flipping the
184 * two either way.
185 */
186 clamp_reg = raddr_add_a_to_live_reg_index(inst);
187 if (clamp_reg == ~0) {
188 DRM_ERROR("direct TMU load wasn't clamped\n");
189 return false;
190 }
191
192 clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
193 if (clamp_offset == ~0) {
194 DRM_ERROR("direct TMU load wasn't clamped\n");
195 return false;
196 }
197
198 /* Store the clamp value's offset in p1 (see reloc_tex() in
199 * vc4_validate.c).
200 */
201 validation_state->tmu_setup[tmu].p_offset[1] =
202 clamp_offset;
203
204 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
205 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
206 DRM_ERROR("direct TMU load didn't add to a uniform\n");
207 return false;
208 }
209
210 validation_state->tmu_setup[tmu].is_direct = true;
211 } else {
212 if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
213 raddr_b == QPU_R_UNIF)) {
214 DRM_ERROR("uniform read in the same instruction as "
215 "texture setup.\n");
216 return false;
217 }
218 }
219
220 if (validation_state->tmu_write_count[tmu] >= 4) {
221 DRM_ERROR("TMU%d got too many parameters before dispatch\n",
222 tmu);
223 return false;
224 }
225 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
226 validated_shader->uniforms_size;
227 validation_state->tmu_write_count[tmu]++;
228 /* Since direct uses a RADDR uniform reference, it will get counted in
229 * check_instruction_reads()
230 */
231 if (!is_direct)
232 validated_shader->uniforms_size += 4;
233
234 if (submit) {
235 if (!record_texture_sample(validated_shader,
236 validation_state, tmu)) {
237 return false;
238 }
239
240 validation_state->tmu_write_count[tmu] = 0;
241 }
242
243 return true;
244 }
245
246 static bool
247 check_reg_write(struct vc4_validated_shader_info *validated_shader,
248 struct vc4_shader_validation_state *validation_state,
249 bool is_mul)
250 {
251 uint64_t inst = validation_state->shader[validation_state->ip];
252 uint32_t waddr = (is_mul ?
253 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
254 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
255
256 switch (waddr) {
257 case QPU_W_UNIFORMS_ADDRESS:
258 /* XXX: We'll probably need to support this for reladdr, but
259 * it's definitely a security-related one.
260 */
261 DRM_ERROR("uniforms address load unsupported\n");
262 return false;
263
264 case QPU_W_TLB_COLOR_MS:
265 case QPU_W_TLB_COLOR_ALL:
266 case QPU_W_TLB_Z:
267 /* These only interact with the tile buffer, not main memory,
268 * so they're safe.
269 */
270 return true;
271
272 case QPU_W_TMU0_S:
273 case QPU_W_TMU0_T:
274 case QPU_W_TMU0_R:
275 case QPU_W_TMU0_B:
276 case QPU_W_TMU1_S:
277 case QPU_W_TMU1_T:
278 case QPU_W_TMU1_R:
279 case QPU_W_TMU1_B:
280 return check_tmu_write(validated_shader, validation_state,
281 is_mul);
282
283 case QPU_W_HOST_INT:
284 case QPU_W_TMU_NOSWAP:
285 case QPU_W_TLB_ALPHA_MASK:
286 case QPU_W_MUTEX_RELEASE:
287 /* XXX: I haven't thought about these, so don't support them
288 * for now.
289 */
290 DRM_ERROR("Unsupported waddr %d\n", waddr);
291 return false;
292
293 case QPU_W_VPM_ADDR:
294 DRM_ERROR("General VPM DMA unsupported\n");
295 return false;
296
297 case QPU_W_VPM:
298 case QPU_W_VPMVCD_SETUP:
299 /* We allow VPM setup in general, even including VPM DMA
300 * configuration setup, because the (unsafe) DMA can only be
301 * triggered by QPU_W_VPM_ADDR writes.
302 */
303 return true;
304
305 case QPU_W_TLB_STENCIL_SETUP:
306 return true;
307 }
308
309 return true;
310 }
311
312 static void
313 track_live_clamps(struct vc4_validated_shader_info *validated_shader,
314 struct vc4_shader_validation_state *validation_state)
315 {
316 uint64_t inst = validation_state->shader[validation_state->ip];
317 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
318 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
319 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
320 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
321 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
322 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
323 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
324 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
325 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
326 bool ws = inst & QPU_WS;
327 uint32_t lri_add_a, lri_add, lri_mul;
328 bool add_a_is_min_0;
329
330 /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
331 * before we clear previous live state.
332 */
333 lri_add_a = raddr_add_a_to_live_reg_index(inst);
334 add_a_is_min_0 = (lri_add_a != ~0 &&
335 validation_state->live_max_clamp_regs[lri_add_a]);
336
337 /* Clear live state for registers written by our instruction. */
338 lri_add = waddr_to_live_reg_index(waddr_add, ws);
339 lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
340 if (lri_mul != ~0) {
341 validation_state->live_max_clamp_regs[lri_mul] = false;
342 validation_state->live_min_clamp_offsets[lri_mul] = ~0;
343 }
344 if (lri_add != ~0) {
345 validation_state->live_max_clamp_regs[lri_add] = false;
346 validation_state->live_min_clamp_offsets[lri_add] = ~0;
347 } else {
348 /* Nothing further to do for live tracking, since only ADDs
349 * generate new live clamp registers.
350 */
351 return;
352 }
353
354 /* Now, handle remaining live clamp tracking for the ADD operation. */
355
356 if (cond_add != QPU_COND_ALWAYS)
357 return;
358
359 if (op_add == QPU_A_MAX) {
360 /* Track live clamps of a value to a minimum of 0 (in either
361 * arg).
362 */
363 if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
364 (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
365 return;
366 }
367
368 validation_state->live_max_clamp_regs[lri_add] = true;
369 } else if (op_add == QPU_A_MIN) {
370 /* Track live clamps of a value clamped to a minimum of 0 and
371 * a maximum of some uniform's offset.
372 */
373 if (!add_a_is_min_0)
374 return;
375
376 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
377 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
378 sig != QPU_SIG_SMALL_IMM)) {
379 return;
380 }
381
382 validation_state->live_min_clamp_offsets[lri_add] =
383 validated_shader->uniforms_size;
384 }
385 }
386
387 static bool
388 check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
389 struct vc4_shader_validation_state *validation_state)
390 {
391 uint64_t inst = validation_state->shader[validation_state->ip];
392 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
393 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
394 bool ok;
395
396 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
397 DRM_ERROR("ADD and MUL both set up textures\n");
398 return false;
399 }
400
401 ok = (check_reg_write(validated_shader, validation_state, false) &&
402 check_reg_write(validated_shader, validation_state, true));
403
404 track_live_clamps(validated_shader, validation_state);
405
406 return ok;
407 }
408
409 static bool
410 check_instruction_reads(uint64_t inst,
411 struct vc4_validated_shader_info *validated_shader)
412 {
413 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
414 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
415 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
416
417 if (raddr_a == QPU_R_UNIF ||
418 (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
419 /* This can't overflow the uint32_t, because we're reading 8
420 * bytes of instruction to increment by 4 here, so we'd
421 * already be OOM.
422 */
423 validated_shader->uniforms_size += 4;
424 }
425
426 return true;
427 }
428
429 /* Make sure that all branches are absolute and point within the shader, and
430 * note their targets for later.
431 */
432 static bool
433 vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
434 {
435 uint32_t max_branch_target = 0;
436 bool found_shader_end = false;
437 int ip;
438 int shader_end_ip = 0;
439 int last_branch = -2;
440
441 for (ip = 0; ip < validation_state->max_ip; ip++) {
442 uint64_t inst = validation_state->shader[ip];
443 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
444 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
445 uint32_t after_delay_ip = ip + 4;
446 uint32_t branch_target_ip;
447
448 if (sig == QPU_SIG_PROG_END) {
449 shader_end_ip = ip;
450 found_shader_end = true;
451 continue;
452 }
453
454 if (sig != QPU_SIG_BRANCH)
455 continue;
456
457 if (ip - last_branch < 4) {
458 DRM_ERROR("Branch at %d during delay slots\n", ip);
459 return false;
460 }
461 last_branch = ip;
462
463 if (inst & QPU_BRANCH_REG) {
464 DRM_ERROR("branching from register relative "
465 "not supported\n");
466 return false;
467 }
468
469 if (!(inst & QPU_BRANCH_REL)) {
470 DRM_ERROR("relative branching required\n");
471 return false;
472 }
473
474 /* The actual branch target is the instruction after the delay
475 * slots, plus whatever byte offset is in the low 32 bits of
476 * the instruction. Make sure we're not branching beyond the
477 * end of the shader object.
478 */
479 if (branch_imm % sizeof(inst) != 0) {
480 DRM_ERROR("branch target not aligned\n");
481 return false;
482 };
483
484 branch_target_ip = after_delay_ip + (branch_imm >> 3);
485 if (branch_target_ip >= validation_state->max_ip) {
486 DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
487 ip, branch_target_ip,
488 validation_state->max_ip);
489 return false;
490 }
491 set_bit(branch_target_ip, validation_state->branch_targets);
492
493 /* Make sure that the non-branching path is also not outside
494 * the shader.
495 */
496 if (after_delay_ip >= validation_state->max_ip) {
497 DRM_ERROR("Branch at %d continues past shader end "
498 "(%d/%d)\n",
499 ip, after_delay_ip, validation_state->max_ip);
500 return false;
501 }
502 set_bit(after_delay_ip, validation_state->branch_targets);
503 max_branch_target = max(max_branch_target, after_delay_ip);
504
505 /* There are two delay slots after program end is signaled
506 * that are still executed, then we're finished.
507 */
508 if (found_shader_end && ip == shader_end_ip + 2)
509 break;
510 }
511
512 if (max_branch_target > shader_end_ip) {
513 DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
514 return false;
515 }
516
517 return true;
518 }
519
520 struct vc4_validated_shader_info *
521 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
522 {
523 bool found_shader_end = false;
524 int shader_end_ip = 0;
525 uint32_t ip;
526 struct vc4_validated_shader_info *validated_shader = NULL;
527 struct vc4_shader_validation_state validation_state;
528 int i;
529
530 memset(&validation_state, 0, sizeof(validation_state));
531 validation_state.shader = shader_obj->vaddr;
532 validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
533
534 for (i = 0; i < 8; i++)
535 validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
536 for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
537 validation_state.live_min_clamp_offsets[i] = ~0;
538
539 validation_state.branch_targets =
540 kcalloc(BITS_TO_LONGS(validation_state.max_ip),
541 sizeof(unsigned long), GFP_KERNEL);
542 if (!validation_state.branch_targets)
543 goto fail;
544
545 validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
546 if (!validated_shader)
547 goto fail;
548
549 if (!vc4_validate_branches(&validation_state))
550 goto fail;
551
552 for (ip = 0; ip < validation_state.max_ip; ip++) {
553 uint64_t inst = validation_state.shader[ip];
554 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
555
556 validation_state.ip = ip;
557
558 switch (sig) {
559 case QPU_SIG_NONE:
560 case QPU_SIG_WAIT_FOR_SCOREBOARD:
561 case QPU_SIG_SCOREBOARD_UNLOCK:
562 case QPU_SIG_COLOR_LOAD:
563 case QPU_SIG_LOAD_TMU0:
564 case QPU_SIG_LOAD_TMU1:
565 case QPU_SIG_PROG_END:
566 case QPU_SIG_SMALL_IMM:
567 if (!check_instruction_writes(validated_shader,
568 &validation_state)) {
569 DRM_ERROR("Bad write at ip %d\n", ip);
570 goto fail;
571 }
572
573 if (!check_instruction_reads(inst, validated_shader))
574 goto fail;
575
576 if (sig == QPU_SIG_PROG_END) {
577 found_shader_end = true;
578 shader_end_ip = ip;
579 }
580
581 break;
582
583 case QPU_SIG_LOAD_IMM:
584 if (!check_instruction_writes(validated_shader,
585 &validation_state)) {
586 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
587 goto fail;
588 }
589 break;
590
591 default:
592 DRM_ERROR("Unsupported QPU signal %d at "
593 "instruction %d\n", sig, ip);
594 goto fail;
595 }
596
597 /* There are two delay slots after program end is signaled
598 * that are still executed, then we're finished.
599 */
600 if (found_shader_end && ip == shader_end_ip + 2)
601 break;
602 }
603
604 if (ip == validation_state.max_ip) {
605 DRM_ERROR("shader failed to terminate before "
606 "shader BO end at %zd\n",
607 shader_obj->base.size);
608 goto fail;
609 }
610
611 /* Again, no chance of integer overflow here because the worst case
612 * scenario is 8 bytes of uniforms plus handles per 8-byte
613 * instruction.
614 */
615 validated_shader->uniforms_src_size =
616 (validated_shader->uniforms_size +
617 4 * validated_shader->num_texture_samples);
618
619 kfree(validation_state.branch_targets);
620
621 return validated_shader;
622
623 fail:
624 kfree(validation_state.branch_targets);
625 if (validated_shader) {
626 kfree(validated_shader->texture_samples);
627 kfree(validated_shader);
628 }
629 return NULL;
630 }