vc4: Don't forget to validate code that's got PROG_END on it.
[mesa.git] / src / gallium / drivers / vc4 / kernel / vc4_validate_shaders.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * DOC: Shader validator for VC4.
26 *
27 * The VC4 has no IOMMU between it and system memory. So, a user with access
28 * to execute shaders could escalate privilege by overwriting system memory
29 * (using the VPM write address register in the general-purpose DMA mode) or
30 * reading system memory it shouldn't (reading it as a texture, or uniform
31 * data, or vertex data).
32 *
33 * This walks over a shader starting from some offset within a BO, ensuring
34 * that its accesses are appropriately bounded, and recording how many texture
35 * accesses are made and where so that we can do relocations for them in the
36 * uniform stream.
37 *
38 * The kernel API has shaders stored in user-mapped BOs. The BOs will be
39 * forcibly unmapped from the process before validation, and any cache of
40 * validated state will be flushed if the mapping is faulted back in.
41 *
42 * Storing the shaders in BOs means that the validation process will be slow
43 * due to uncached reads, but since shaders are long-lived and shader BOs are
44 * never actually modified, this shouldn't be a problem.
45 */
46
47 #include "vc4_drv.h"
48 #include "vc4_qpu.h"
49 #include "vc4_qpu_defines.h"
50
51 struct vc4_shader_validation_state {
52 struct vc4_texture_sample_info tmu_setup[2];
53 int tmu_write_count[2];
54 };
55
56 static bool
57 is_tmu_write(uint32_t waddr)
58 {
59 return (waddr >= QPU_W_TMU0_S &&
60 waddr <= QPU_W_TMU1_B);
61 }
62
63 static bool
64 record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
65 struct vc4_shader_validation_state *validation_state,
66 int tmu)
67 {
68 uint32_t s = validated_shader->num_texture_samples;
69 int i;
70 struct vc4_texture_sample_info *temp_samples;
71
72 temp_samples = krealloc(validated_shader->texture_samples,
73 (s + 1) * sizeof(*temp_samples),
74 GFP_KERNEL);
75 if (!temp_samples)
76 return false;
77
78 memcpy(temp_samples[s].p_offset,
79 validation_state->tmu_setup[tmu].p_offset,
80 validation_state->tmu_write_count[tmu] * sizeof(uint32_t));
81 for (i = validation_state->tmu_write_count[tmu]; i < 4; i++)
82 temp_samples[s].p_offset[i] = ~0;
83
84 validated_shader->num_texture_samples = s + 1;
85 validated_shader->texture_samples = temp_samples;
86
87 return true;
88 }
89
90 static bool
91 check_tmu_write(struct vc4_validated_shader_info *validated_shader,
92 struct vc4_shader_validation_state *validation_state,
93 uint32_t waddr)
94 {
95 int tmu = waddr > QPU_W_TMU0_B;
96
97 if (!is_tmu_write(waddr))
98 return true;
99
100 if (validation_state->tmu_write_count[tmu] >= 4) {
101 DRM_ERROR("TMU%d got too many parameters before dispatch\n",
102 tmu);
103 return false;
104 }
105 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
106 validated_shader->uniforms_size;
107 validation_state->tmu_write_count[tmu]++;
108 validated_shader->uniforms_size += 4;
109
110 if (waddr == QPU_W_TMU0_S || waddr == QPU_W_TMU1_S) {
111 if (!record_validated_texture_sample(validated_shader,
112 validation_state, tmu)) {
113 return false;
114 }
115
116 validation_state->tmu_write_count[tmu] = 0;
117 }
118
119 return true;
120 }
121
122 static bool
123 check_register_write(struct vc4_validated_shader_info *validated_shader,
124 struct vc4_shader_validation_state *validation_state,
125 uint32_t waddr)
126 {
127 switch (waddr) {
128 case QPU_W_UNIFORMS_ADDRESS:
129 /* XXX: We'll probably need to support this for reladdr, but
130 * it's definitely a security-related one.
131 */
132 DRM_ERROR("uniforms address load unsupported\n");
133 return false;
134
135 case QPU_W_TLB_COLOR_MS:
136 case QPU_W_TLB_COLOR_ALL:
137 case QPU_W_TLB_Z:
138 /* These only interact with the tile buffer, not main memory,
139 * so they're safe.
140 */
141 return true;
142
143 case QPU_W_TMU0_S:
144 case QPU_W_TMU0_T:
145 case QPU_W_TMU0_R:
146 case QPU_W_TMU0_B:
147 case QPU_W_TMU1_S:
148 case QPU_W_TMU1_T:
149 case QPU_W_TMU1_R:
150 case QPU_W_TMU1_B:
151 return check_tmu_write(validated_shader, validation_state,
152 waddr);
153
154 case QPU_W_HOST_INT:
155 case QPU_W_TMU_NOSWAP:
156 case QPU_W_TLB_ALPHA_MASK:
157 case QPU_W_MUTEX_RELEASE:
158 /* XXX: I haven't thought about these, so don't support them
159 * for now.
160 */
161 DRM_ERROR("Unsupported waddr %d\n", waddr);
162 return false;
163
164 case QPU_W_VPM_ADDR:
165 DRM_ERROR("General VPM DMA unsupported\n");
166 return false;
167
168 case QPU_W_VPM:
169 case QPU_W_VPMVCD_SETUP:
170 /* We allow VPM setup in general, even including VPM DMA
171 * configuration setup, because the (unsafe) DMA can only be
172 * triggered by QPU_W_VPM_ADDR writes.
173 */
174 return true;
175
176 case QPU_W_TLB_STENCIL_SETUP:
177 return true;
178 }
179
180 return true;
181 }
182
183 static bool
184 check_instruction_writes(uint64_t inst,
185 struct vc4_validated_shader_info *validated_shader,
186 struct vc4_shader_validation_state *validation_state)
187 {
188 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
189 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
190
191 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
192 DRM_ERROR("ADD and MUL both set up textures\n");
193 return false;
194 }
195
196 return (check_register_write(validated_shader, validation_state, waddr_add) &&
197 check_register_write(validated_shader, validation_state, waddr_mul));
198 }
199
200 static bool
201 check_instruction_reads(uint64_t inst,
202 struct vc4_validated_shader_info *validated_shader)
203 {
204 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
205 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
206 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
207 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
208
209 if (raddr_a == QPU_R_UNIF ||
210 raddr_b == QPU_R_UNIF) {
211 if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) {
212 DRM_ERROR("uniform read in the same instruction as "
213 "texture setup\n");
214 return false;
215 }
216
217 /* This can't overflow the uint32_t, because we're reading 8
218 * bytes of instruction to increment by 4 here, so we'd
219 * already be OOM.
220 */
221 validated_shader->uniforms_size += 4;
222 }
223
224 return true;
225 }
226
227 struct vc4_validated_shader_info *
228 vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
229 uint32_t start_offset)
230 {
231 bool found_shader_end = false;
232 int shader_end_ip = 0;
233 uint32_t ip, max_ip;
234 uint64_t *shader;
235 struct vc4_validated_shader_info *validated_shader;
236 struct vc4_shader_validation_state validation_state;
237
238 memset(&validation_state, 0, sizeof(validation_state));
239
240 if (start_offset + sizeof(uint64_t) > shader_obj->base.size) {
241 DRM_ERROR("shader starting at %d outside of BO sized %d\n",
242 start_offset,
243 shader_obj->base.size);
244 return NULL;
245 }
246 shader = shader_obj->vaddr + start_offset;
247 max_ip = (shader_obj->base.size - start_offset) / sizeof(uint64_t);
248
249 validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
250 if (!validated_shader)
251 return NULL;
252
253 for (ip = 0; ip < max_ip; ip++) {
254 uint64_t inst = shader[ip];
255 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
256
257 switch (sig) {
258 case QPU_SIG_NONE:
259 case QPU_SIG_WAIT_FOR_SCOREBOARD:
260 case QPU_SIG_SCOREBOARD_UNLOCK:
261 case QPU_SIG_COLOR_LOAD:
262 case QPU_SIG_LOAD_TMU0:
263 case QPU_SIG_LOAD_TMU1:
264 case QPU_SIG_PROG_END:
265 if (!check_instruction_writes(inst, validated_shader,
266 &validation_state)) {
267 DRM_ERROR("Bad write at ip %d\n", ip);
268 goto fail;
269 }
270
271 if (!check_instruction_reads(inst, validated_shader))
272 goto fail;
273
274 if (sig == QPU_SIG_PROG_END) {
275 found_shader_end = true;
276 shader_end_ip = ip;
277 }
278
279 break;
280
281 case QPU_SIG_LOAD_IMM:
282 if (!check_instruction_writes(inst, validated_shader,
283 &validation_state)) {
284 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
285 goto fail;
286 }
287 break;
288
289 default:
290 DRM_ERROR("Unsupported QPU signal %d at "
291 "instruction %d\n", sig, ip);
292 goto fail;
293 }
294
295 /* There are two delay slots after program end is signaled
296 * that are still executed, then we're finished.
297 */
298 if (found_shader_end && ip == shader_end_ip + 2)
299 break;
300 }
301
302 if (ip == max_ip) {
303 DRM_ERROR("shader starting at %d failed to terminate before "
304 "shader BO end at %d\n",
305 start_offset,
306 shader_obj->base.size);
307 goto fail;
308 }
309
310 /* Again, no chance of integer overflow here because the worst case
311 * scenario is 8 bytes of uniforms plus handles per 8-byte
312 * instruction.
313 */
314 validated_shader->uniforms_src_size =
315 (validated_shader->uniforms_size +
316 4 * validated_shader->num_texture_samples);
317
318 return validated_shader;
319
320 fail:
321 kfree(validated_shader);
322 return NULL;
323 }