vc4: Drop redundant check for is_tmu_write().
[mesa.git] / src / gallium / drivers / vc4 / kernel / vc4_validate_shaders.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * DOC: Shader validator for VC4.
26 *
27 * The VC4 has no IOMMU between it and system memory. So, a user with access
28 * to execute shaders could escalate privilege by overwriting system memory
29 * (using the VPM write address register in the general-purpose DMA mode) or
30 * reading system memory it shouldn't (reading it as a texture, or uniform
31 * data, or vertex data).
32 *
33 * This walks over a shader starting from some offset within a BO, ensuring
34 * that its accesses are appropriately bounded, and recording how many texture
35 * accesses are made and where so that we can do relocations for them in the
36 * uniform stream.
37 *
38 * The kernel API has shaders stored in user-mapped BOs. The BOs will be
39 * forcibly unmapped from the process before validation, and any cache of
40 * validated state will be flushed if the mapping is faulted back in.
41 *
42 * Storing the shaders in BOs means that the validation process will be slow
43 * due to uncached reads, but since shaders are long-lived and shader BOs are
44 * never actually modified, this shouldn't be a problem.
45 */
46
47 #include "vc4_drv.h"
48 #include "vc4_qpu.h"
49 #include "vc4_qpu_defines.h"
50
51 struct vc4_shader_validation_state {
52 struct vc4_texture_sample_info tmu_setup[2];
53 int tmu_write_count[2];
54 };
55
56 static bool
57 is_tmu_write(uint32_t waddr)
58 {
59 return (waddr >= QPU_W_TMU0_S &&
60 waddr <= QPU_W_TMU1_B);
61 }
62
63 static bool
64 record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
65 struct vc4_shader_validation_state *validation_state,
66 int tmu)
67 {
68 uint32_t s = validated_shader->num_texture_samples;
69 int i;
70 struct vc4_texture_sample_info *temp_samples;
71
72 temp_samples = krealloc(validated_shader->texture_samples,
73 (s + 1) * sizeof(*temp_samples),
74 GFP_KERNEL);
75 if (!temp_samples)
76 return false;
77
78 memcpy(temp_samples[s].p_offset,
79 validation_state->tmu_setup[tmu].p_offset,
80 validation_state->tmu_write_count[tmu] * sizeof(uint32_t));
81 for (i = validation_state->tmu_write_count[tmu]; i < 4; i++)
82 temp_samples[s].p_offset[i] = ~0;
83
84 validated_shader->num_texture_samples = s + 1;
85 validated_shader->texture_samples = temp_samples;
86
87 return true;
88 }
89
90 static bool
91 check_tmu_write(struct vc4_validated_shader_info *validated_shader,
92 struct vc4_shader_validation_state *validation_state,
93 uint32_t waddr)
94 {
95 int tmu = waddr > QPU_W_TMU0_B;
96
97 if (validation_state->tmu_write_count[tmu] >= 4) {
98 DRM_ERROR("TMU%d got too many parameters before dispatch\n",
99 tmu);
100 return false;
101 }
102 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
103 validated_shader->uniforms_size;
104 validation_state->tmu_write_count[tmu]++;
105 validated_shader->uniforms_size += 4;
106
107 if (waddr == QPU_W_TMU0_S || waddr == QPU_W_TMU1_S) {
108 if (!record_validated_texture_sample(validated_shader,
109 validation_state, tmu)) {
110 return false;
111 }
112
113 validation_state->tmu_write_count[tmu] = 0;
114 }
115
116 return true;
117 }
118
119 static bool
120 check_register_write(struct vc4_validated_shader_info *validated_shader,
121 struct vc4_shader_validation_state *validation_state,
122 uint32_t waddr)
123 {
124 switch (waddr) {
125 case QPU_W_UNIFORMS_ADDRESS:
126 /* XXX: We'll probably need to support this for reladdr, but
127 * it's definitely a security-related one.
128 */
129 DRM_ERROR("uniforms address load unsupported\n");
130 return false;
131
132 case QPU_W_TLB_COLOR_MS:
133 case QPU_W_TLB_COLOR_ALL:
134 case QPU_W_TLB_Z:
135 /* These only interact with the tile buffer, not main memory,
136 * so they're safe.
137 */
138 return true;
139
140 case QPU_W_TMU0_S:
141 case QPU_W_TMU0_T:
142 case QPU_W_TMU0_R:
143 case QPU_W_TMU0_B:
144 case QPU_W_TMU1_S:
145 case QPU_W_TMU1_T:
146 case QPU_W_TMU1_R:
147 case QPU_W_TMU1_B:
148 return check_tmu_write(validated_shader, validation_state,
149 waddr);
150
151 case QPU_W_HOST_INT:
152 case QPU_W_TMU_NOSWAP:
153 case QPU_W_TLB_ALPHA_MASK:
154 case QPU_W_MUTEX_RELEASE:
155 /* XXX: I haven't thought about these, so don't support them
156 * for now.
157 */
158 DRM_ERROR("Unsupported waddr %d\n", waddr);
159 return false;
160
161 case QPU_W_VPM_ADDR:
162 DRM_ERROR("General VPM DMA unsupported\n");
163 return false;
164
165 case QPU_W_VPM:
166 case QPU_W_VPMVCD_SETUP:
167 /* We allow VPM setup in general, even including VPM DMA
168 * configuration setup, because the (unsafe) DMA can only be
169 * triggered by QPU_W_VPM_ADDR writes.
170 */
171 return true;
172
173 case QPU_W_TLB_STENCIL_SETUP:
174 return true;
175 }
176
177 return true;
178 }
179
180 static bool
181 check_instruction_writes(uint64_t inst,
182 struct vc4_validated_shader_info *validated_shader,
183 struct vc4_shader_validation_state *validation_state)
184 {
185 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
186 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
187
188 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
189 DRM_ERROR("ADD and MUL both set up textures\n");
190 return false;
191 }
192
193 return (check_register_write(validated_shader, validation_state, waddr_add) &&
194 check_register_write(validated_shader, validation_state, waddr_mul));
195 }
196
197 static bool
198 check_instruction_reads(uint64_t inst,
199 struct vc4_validated_shader_info *validated_shader)
200 {
201 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
202 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
203 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
204 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
205
206 if (raddr_a == QPU_R_UNIF ||
207 raddr_b == QPU_R_UNIF) {
208 if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) {
209 DRM_ERROR("uniform read in the same instruction as "
210 "texture setup\n");
211 return false;
212 }
213
214 /* This can't overflow the uint32_t, because we're reading 8
215 * bytes of instruction to increment by 4 here, so we'd
216 * already be OOM.
217 */
218 validated_shader->uniforms_size += 4;
219 }
220
221 return true;
222 }
223
224 struct vc4_validated_shader_info *
225 vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
226 uint32_t start_offset)
227 {
228 bool found_shader_end = false;
229 int shader_end_ip = 0;
230 uint32_t ip, max_ip;
231 uint64_t *shader;
232 struct vc4_validated_shader_info *validated_shader;
233 struct vc4_shader_validation_state validation_state;
234
235 memset(&validation_state, 0, sizeof(validation_state));
236
237 if (start_offset + sizeof(uint64_t) > shader_obj->base.size) {
238 DRM_ERROR("shader starting at %d outside of BO sized %d\n",
239 start_offset,
240 shader_obj->base.size);
241 return NULL;
242 }
243 shader = shader_obj->vaddr + start_offset;
244 max_ip = (shader_obj->base.size - start_offset) / sizeof(uint64_t);
245
246 validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
247 if (!validated_shader)
248 return NULL;
249
250 for (ip = 0; ip < max_ip; ip++) {
251 uint64_t inst = shader[ip];
252 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
253
254 switch (sig) {
255 case QPU_SIG_NONE:
256 case QPU_SIG_WAIT_FOR_SCOREBOARD:
257 case QPU_SIG_SCOREBOARD_UNLOCK:
258 case QPU_SIG_COLOR_LOAD:
259 case QPU_SIG_LOAD_TMU0:
260 case QPU_SIG_LOAD_TMU1:
261 case QPU_SIG_PROG_END:
262 if (!check_instruction_writes(inst, validated_shader,
263 &validation_state)) {
264 DRM_ERROR("Bad write at ip %d\n", ip);
265 goto fail;
266 }
267
268 if (!check_instruction_reads(inst, validated_shader))
269 goto fail;
270
271 if (sig == QPU_SIG_PROG_END) {
272 found_shader_end = true;
273 shader_end_ip = ip;
274 }
275
276 break;
277
278 case QPU_SIG_LOAD_IMM:
279 if (!check_instruction_writes(inst, validated_shader,
280 &validation_state)) {
281 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
282 goto fail;
283 }
284 break;
285
286 default:
287 DRM_ERROR("Unsupported QPU signal %d at "
288 "instruction %d\n", sig, ip);
289 goto fail;
290 }
291
292 /* There are two delay slots after program end is signaled
293 * that are still executed, then we're finished.
294 */
295 if (found_shader_end && ip == shader_end_ip + 2)
296 break;
297 }
298
299 if (ip == max_ip) {
300 DRM_ERROR("shader starting at %d failed to terminate before "
301 "shader BO end at %d\n",
302 start_offset,
303 shader_obj->base.size);
304 goto fail;
305 }
306
307 /* Again, no chance of integer overflow here because the worst case
308 * scenario is 8 bytes of uniforms plus handles per 8-byte
309 * instruction.
310 */
311 validated_shader->uniforms_src_size =
312 (validated_shader->uniforms_size +
313 4 * validated_shader->num_texture_samples);
314
315 return validated_shader;
316
317 fail:
318 kfree(validated_shader);
319 return NULL;
320 }