2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Command list validator for VC4.
27 * The VC4 has no IOMMU between it and system memory. So, a user with
28 * access to execute command lists could escalate privilege by
29 * overwriting system memory (drawing to it as a framebuffer) or
30 * reading system memory it shouldn't (reading it as a texture, or
31 * uniform data, or vertex data).
33 * This validates command lists to ensure that all accesses are within
34 * the bounds of the GEM objects referenced. It explicitly whitelists
35 * packets, and looks at the offsets in any address fields to make
36 * sure they're constrained within the BOs they reference.
38 * Note that because of the validation that's happening anyway, this
39 * is where GEM relocation processing happens.
43 #include "vc4_packet.h"
45 #define VALIDATE_ARGS \
46 struct exec_info *exec, \
51 /** Return the width in pixels of a 64-byte microtile. */
64 DRM_ERROR("unknown cpp: %d\n", cpp
);
69 /** Return the height in pixels of a 64-byte microtile. */
81 DRM_ERROR("unknown cpp: %d\n", cpp
);
87 * The texture unit decides what tiling format a particular miplevel is using
88 * this function, so we lay out our miptrees accordingly.
91 size_is_lt(uint32_t width
, uint32_t height
, int cpp
)
93 return (width
<= 4 * utile_width(cpp
) ||
94 height
<= 4 * utile_height(cpp
));
98 vc4_use_bo(struct exec_info
*exec
,
100 enum vc4_bo_mode mode
,
101 struct drm_gem_cma_object
**obj
)
105 if (hindex
>= exec
->bo_count
) {
106 DRM_ERROR("BO index %d greater than BO count %d\n",
107 hindex
, exec
->bo_count
);
111 if (exec
->bo
[hindex
].mode
!= mode
) {
112 if (exec
->bo
[hindex
].mode
== VC4_MODE_UNDECIDED
) {
113 exec
->bo
[hindex
].mode
= mode
;
115 DRM_ERROR("BO index %d reused with mode %d vs %d\n",
116 hindex
, exec
->bo
[hindex
].mode
, mode
);
121 *obj
= exec
->bo
[hindex
].bo
;
126 vc4_use_handle(struct exec_info
*exec
,
127 uint32_t gem_handles_packet_index
,
128 enum vc4_bo_mode mode
,
129 struct drm_gem_cma_object
**obj
)
131 return vc4_use_bo(exec
, exec
->bo_index
[gem_handles_packet_index
],
136 gl_shader_rec_size(uint32_t pointer_bits
)
138 uint32_t attribute_count
= pointer_bits
& 7;
139 bool extended
= pointer_bits
& 8;
141 if (attribute_count
== 0)
145 return 100 + attribute_count
* 4;
147 return 36 + attribute_count
* 8;
151 check_tex_size(struct exec_info
*exec
, struct drm_gem_cma_object
*fbo
,
152 uint32_t offset
, uint8_t tiling_format
,
153 uint32_t width
, uint32_t height
, uint8_t cpp
)
155 uint32_t aligned_width
, aligned_height
, stride
, size
;
156 uint32_t utile_w
= utile_width(cpp
);
157 uint32_t utile_h
= utile_height(cpp
);
159 /* The values are limited by the packet/texture parameter bitfields,
160 * so we don't need to worry as much about integer overflow.
162 BUG_ON(width
> 65535);
163 BUG_ON(height
> 65535);
165 switch (tiling_format
) {
166 case VC4_TILING_FORMAT_LINEAR
:
167 aligned_width
= roundup(width
, 16 / cpp
);
168 aligned_height
= height
;
170 case VC4_TILING_FORMAT_T
:
171 aligned_width
= roundup(width
, utile_w
* 8);
172 aligned_height
= roundup(height
, utile_h
* 8);
174 case VC4_TILING_FORMAT_LT
:
175 aligned_width
= roundup(width
, utile_w
);
176 aligned_height
= roundup(height
, utile_h
);
179 DRM_ERROR("buffer tiling %d unsupported\n", tiling_format
);
183 stride
= aligned_width
* cpp
;
185 if (INT_MAX
/ stride
< aligned_height
) {
186 DRM_ERROR("Overflow in fbo size (%dx%d -> %dx%d)\n",
188 aligned_width
, aligned_height
);
191 size
= stride
* aligned_height
;
193 if (size
+ offset
< size
||
194 size
+ offset
> fbo
->base
.size
) {
195 DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %d)\n",
197 aligned_width
, aligned_height
,
198 size
, offset
, fbo
->base
.size
);
206 validate_start_tile_binning(VALIDATE_ARGS
)
208 if (exec
->found_start_tile_binning_packet
) {
209 DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
212 exec
->found_start_tile_binning_packet
= true;
214 if (!exec
->found_tile_binning_mode_config_packet
) {
215 DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
223 validate_branch_to_sublist(VALIDATE_ARGS
)
225 struct drm_gem_cma_object
*target
;
228 if (!vc4_use_handle(exec
, 0, VC4_MODE_TILE_ALLOC
, &target
))
231 if (target
!= exec
->tile_alloc_bo
) {
232 DRM_ERROR("Jumping to BOs other than tile alloc unsupported\n");
236 offset
= *(uint32_t *)(untrusted
+ 0);
237 if (offset
% exec
->tile_alloc_init_block_size
||
238 offset
/ exec
->tile_alloc_init_block_size
>
239 exec
->bin_tiles_x
* exec
->bin_tiles_y
) {
240 DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial "
241 "tile allocation space.\n");
245 *(uint32_t *)(validated
+ 0) = target
->paddr
+ offset
;
251 * validate_loadstore_tile_buffer_general() - Validation for
252 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL and
253 * VC4_PACKET_STORE_TILE_BUFFER_GENERAL.
255 * The two packets are nearly the same, except for the TLB-clearing management
256 * bits not being present for loads. Additionally, while stores are executed
257 * immediately (using the current tile coordinates), loads are queued to be
258 * executed when the tile coordinates packet occurs.
260 * Note that coordinates packets are validated to be within the declared
261 * bin_x/y, which themselves are verified to match the rendering-configuration
262 * FB width and height (which the hardware uses to clip loads and stores).
265 validate_loadstore_tile_buffer_general(VALIDATE_ARGS
)
267 uint32_t packet_b0
= *(uint8_t *)(untrusted
+ 0);
268 uint32_t packet_b1
= *(uint8_t *)(untrusted
+ 1);
269 struct drm_gem_cma_object
*fbo
;
270 uint32_t buffer_type
= packet_b0
& 0xf;
271 uint32_t untrusted_address
, offset
, cpp
;
273 switch (buffer_type
) {
274 case VC4_LOADSTORE_TILE_BUFFER_NONE
:
276 case VC4_LOADSTORE_TILE_BUFFER_COLOR
:
277 if ((packet_b1
& VC4_LOADSTORE_TILE_BUFFER_MASK
) ==
278 VC4_LOADSTORE_TILE_BUFFER_RGBA8888
) {
285 case VC4_LOADSTORE_TILE_BUFFER_Z
:
286 case VC4_LOADSTORE_TILE_BUFFER_ZS
:
291 DRM_ERROR("Load/store type %d unsupported\n", buffer_type
);
295 if (!vc4_use_handle(exec
, 0, VC4_MODE_RENDER
, &fbo
))
298 untrusted_address
= *(uint32_t *)(untrusted
+ 2);
299 offset
= untrusted_address
& ~0xf;
301 if (!check_tex_size(exec
, fbo
, offset
,
303 VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK
) >>
304 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT
),
305 exec
->fb_width
, exec
->fb_height
, cpp
)) {
309 *(uint32_t *)(validated
+ 2) = (offset
+ fbo
->paddr
+
310 (untrusted_address
& 0xf));
316 validate_indexed_prim_list(VALIDATE_ARGS
)
318 struct drm_gem_cma_object
*ib
;
319 uint32_t length
= *(uint32_t *)(untrusted
+ 1);
320 uint32_t offset
= *(uint32_t *)(untrusted
+ 5);
321 uint32_t max_index
= *(uint32_t *)(untrusted
+ 9);
322 uint32_t index_size
= (*(uint8_t *)(untrusted
+ 0) >> 4) ? 2 : 1;
323 struct vc4_shader_state
*shader_state
;
325 /* Check overflow condition */
326 if (exec
->shader_state_count
== 0) {
327 DRM_ERROR("shader state must precede primitives\n");
330 shader_state
= &exec
->shader_state
[exec
->shader_state_count
- 1];
332 if (max_index
> shader_state
->max_index
)
333 shader_state
->max_index
= max_index
;
335 if (!vc4_use_handle(exec
, 0, VC4_MODE_RENDER
, &ib
))
338 if (offset
> ib
->base
.size
||
339 (ib
->base
.size
- offset
) / index_size
< length
) {
340 DRM_ERROR("IB access overflow (%d + %d*%d > %d)\n",
341 offset
, length
, index_size
, ib
->base
.size
);
345 *(uint32_t *)(validated
+ 5) = ib
->paddr
+ offset
;
351 validate_gl_array_primitive(VALIDATE_ARGS
)
353 uint32_t length
= *(uint32_t *)(untrusted
+ 1);
354 uint32_t base_index
= *(uint32_t *)(untrusted
+ 5);
356 struct vc4_shader_state
*shader_state
;
358 /* Check overflow condition */
359 if (exec
->shader_state_count
== 0) {
360 DRM_ERROR("shader state must precede primitives\n");
363 shader_state
= &exec
->shader_state
[exec
->shader_state_count
- 1];
365 if (length
+ base_index
< length
) {
366 DRM_ERROR("primitive vertex count overflow\n");
369 max_index
= length
+ base_index
- 1;
371 if (max_index
> shader_state
->max_index
)
372 shader_state
->max_index
= max_index
;
378 validate_gl_shader_state(VALIDATE_ARGS
)
380 uint32_t i
= exec
->shader_state_count
++;
382 if (i
>= exec
->shader_state_size
) {
383 DRM_ERROR("More requests for shader states than declared\n");
387 exec
->shader_state
[i
].packet
= VC4_PACKET_GL_SHADER_STATE
;
388 exec
->shader_state
[i
].addr
= *(uint32_t *)untrusted
;
389 exec
->shader_state
[i
].max_index
= 0;
391 if (exec
->shader_state
[i
].addr
& ~0xf) {
392 DRM_ERROR("high bits set in GL shader rec reference\n");
396 *(uint32_t *)validated
= (exec
->shader_rec_p
+
397 exec
->shader_state
[i
].addr
);
399 exec
->shader_rec_p
+=
400 roundup(gl_shader_rec_size(exec
->shader_state
[i
].addr
), 16);
406 validate_nv_shader_state(VALIDATE_ARGS
)
408 uint32_t i
= exec
->shader_state_count
++;
410 if (i
>= exec
->shader_state_size
) {
411 DRM_ERROR("More requests for shader states than declared\n");
415 exec
->shader_state
[i
].packet
= VC4_PACKET_NV_SHADER_STATE
;
416 exec
->shader_state
[i
].addr
= *(uint32_t *)untrusted
;
418 if (exec
->shader_state
[i
].addr
& 15) {
419 DRM_ERROR("NV shader state address 0x%08x misaligned\n",
420 exec
->shader_state
[i
].addr
);
424 *(uint32_t *)validated
= (exec
->shader_state
[i
].addr
+
431 validate_tile_binning_config(VALIDATE_ARGS
)
433 struct drm_gem_cma_object
*tile_allocation
;
434 struct drm_gem_cma_object
*tile_state_data_array
;
436 uint32_t tile_allocation_size
;
438 if (!vc4_use_handle(exec
, 0, VC4_MODE_TILE_ALLOC
, &tile_allocation
) ||
439 !vc4_use_handle(exec
, 1, VC4_MODE_TSDA
, &tile_state_data_array
))
442 if (exec
->found_tile_binning_mode_config_packet
) {
443 DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
446 exec
->found_tile_binning_mode_config_packet
= true;
448 exec
->bin_tiles_x
= *(uint8_t *)(untrusted
+ 12);
449 exec
->bin_tiles_y
= *(uint8_t *)(untrusted
+ 13);
450 flags
= *(uint8_t *)(untrusted
+ 14);
452 if (exec
->bin_tiles_x
== 0 ||
453 exec
->bin_tiles_y
== 0) {
454 DRM_ERROR("Tile binning config of %dx%d too small\n",
455 exec
->bin_tiles_x
, exec
->bin_tiles_y
);
459 /* Our validation relies on the user not getting to set up their own
460 * tile state/tile allocation BO contents.
462 if (!(flags
& VC4_BIN_CONFIG_AUTO_INIT_TSDA
)) {
463 DRM_ERROR("binning config missing "
464 "VC4_BIN_CONFIG_AUTO_INIT_TSDA\n");
468 if (flags
& (VC4_BIN_CONFIG_DB_NON_MS
|
469 VC4_BIN_CONFIG_TILE_BUFFER_64BIT
|
470 VC4_BIN_CONFIG_MS_MODE_4X
)) {
471 DRM_ERROR("unsupported bining config flags 0x%02x\n", flags
);
475 if (*(uint32_t *)(untrusted
+ 0) != 0) {
476 DRM_ERROR("tile allocation offset != 0 unsupported\n");
479 tile_allocation_size
= *(uint32_t *)(untrusted
+ 4);
480 if (tile_allocation_size
> tile_allocation
->base
.size
) {
481 DRM_ERROR("tile allocation size %d > BO size %d\n",
482 tile_allocation_size
, tile_allocation
->base
.size
);
485 *(uint32_t *)validated
= tile_allocation
->paddr
;
486 exec
->tile_alloc_bo
= tile_allocation
;
488 exec
->tile_alloc_init_block_size
= 1 << (5 + ((flags
>> 5) & 3));
489 if (exec
->bin_tiles_x
* exec
->bin_tiles_y
*
490 exec
->tile_alloc_init_block_size
> tile_allocation_size
) {
491 DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n",
492 exec
->bin_tiles_x
* exec
->bin_tiles_y
*
493 exec
->tile_alloc_init_block_size
,
494 tile_allocation_size
);
497 if (*(uint32_t *)(untrusted
+ 8) != 0) {
498 DRM_ERROR("TSDA offset != 0 unsupported\n");
501 if (exec
->bin_tiles_x
* exec
->bin_tiles_y
* 48 >
502 tile_state_data_array
->base
.size
) {
503 DRM_ERROR("TSDA of %db too small for %dx%d bin config\n",
504 tile_state_data_array
->base
.size
,
505 exec
->bin_tiles_x
, exec
->bin_tiles_y
);
507 *(uint32_t *)(validated
+ 8) = tile_state_data_array
->paddr
;
513 validate_tile_rendering_mode_config(VALIDATE_ARGS
)
515 struct drm_gem_cma_object
*fbo
;
516 uint32_t flags
, offset
, cpp
;
518 if (exec
->found_tile_rendering_mode_config_packet
) {
519 DRM_ERROR("Duplicate VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
522 exec
->found_tile_rendering_mode_config_packet
= true;
524 if (!vc4_use_handle(exec
, 0, VC4_MODE_RENDER
, &fbo
))
527 exec
->fb_width
= *(uint16_t *)(untrusted
+ 4);
528 exec
->fb_height
= *(uint16_t *)(untrusted
+ 6);
530 /* Make sure that the fb width/height matches the binning config -- we
531 * rely on being able to interchange these for various assertions.
532 * (Within a tile, loads and stores will be clipped to the
533 * width/height, but we allow load/storing to any binned tile).
535 if (exec
->fb_width
<= (exec
->bin_tiles_x
- 1) * 64 ||
536 exec
->fb_width
> exec
->bin_tiles_x
* 64 ||
537 exec
->fb_height
<= (exec
->bin_tiles_y
- 1) * 64 ||
538 exec
->fb_height
> exec
->bin_tiles_y
* 64) {
539 DRM_ERROR("bin config %dx%d doesn't match FB %dx%d\n",
540 exec
->bin_tiles_x
, exec
->bin_tiles_y
,
541 exec
->fb_width
, exec
->fb_height
);
545 flags
= *(uint16_t *)(untrusted
+ 8);
546 if ((flags
& VC4_RENDER_CONFIG_FORMAT_MASK
) ==
547 VC4_RENDER_CONFIG_FORMAT_RGBA8888
) {
553 offset
= *(uint32_t *)untrusted
;
554 if (!check_tex_size(exec
, fbo
, offset
,
556 VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK
) >>
557 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT
),
558 exec
->fb_width
, exec
->fb_height
, cpp
)) {
562 *(uint32_t *)validated
= fbo
->paddr
+ offset
;
568 validate_tile_coordinates(VALIDATE_ARGS
)
570 uint8_t tile_x
= *(uint8_t *)(untrusted
+ 0);
571 uint8_t tile_y
= *(uint8_t *)(untrusted
+ 1);
573 if (tile_x
>= exec
->bin_tiles_x
||
574 tile_y
>= exec
->bin_tiles_y
) {
575 DRM_ERROR("Tile coordinates %d,%d > bin config %d,%d\n",
587 validate_gem_handles(VALIDATE_ARGS
)
589 memcpy(exec
->bo_index
, untrusted
, sizeof(exec
->bo_index
));
593 static const struct cmd_info
{
598 int (*func
)(struct exec_info
*exec
, void *validated
, void *untrusted
);
600 [VC4_PACKET_HALT
] = { 1, 1, 1, "halt", NULL
},
601 [VC4_PACKET_NOP
] = { 1, 1, 1, "nop", NULL
},
602 [VC4_PACKET_FLUSH
] = { 1, 1, 1, "flush", NULL
},
603 [VC4_PACKET_FLUSH_ALL
] = { 1, 0, 1, "flush all state", NULL
},
604 [VC4_PACKET_START_TILE_BINNING
] = { 1, 0, 1, "start tile binning", validate_start_tile_binning
},
605 [VC4_PACKET_INCREMENT_SEMAPHORE
] = { 1, 0, 1, "increment semaphore", NULL
},
606 [VC4_PACKET_WAIT_ON_SEMAPHORE
] = { 1, 1, 1, "wait on semaphore", NULL
},
607 /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but
608 * we only use it from the render CL in order to jump into the tile
611 [VC4_PACKET_BRANCH_TO_SUB_LIST
] = { 0, 1, 5, "branch to sublist", validate_branch_to_sublist
},
612 [VC4_PACKET_STORE_MS_TILE_BUFFER
] = { 0, 1, 1, "store MS resolved tile color buffer", NULL
},
613 [VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF
] = { 0, 1, 1, "store MS resolved tile color buffer and EOF", NULL
},
615 [VC4_PACKET_STORE_TILE_BUFFER_GENERAL
] = { 0, 1, 7, "Store Tile Buffer General", validate_loadstore_tile_buffer_general
},
616 [VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
] = { 0, 1, 7, "Load Tile Buffer General", validate_loadstore_tile_buffer_general
},
618 [VC4_PACKET_GL_INDEXED_PRIMITIVE
] = { 1, 1, 14, "Indexed Primitive List", validate_indexed_prim_list
},
620 [VC4_PACKET_GL_ARRAY_PRIMITIVE
] = { 1, 1, 10, "Vertex Array Primitives", validate_gl_array_primitive
},
622 /* This is only used by clipped primitives (packets 48 and 49), which
623 * we don't support parsing yet.
625 [VC4_PACKET_PRIMITIVE_LIST_FORMAT
] = { 1, 1, 2, "primitive list format", NULL
},
627 [VC4_PACKET_GL_SHADER_STATE
] = { 1, 1, 5, "GL Shader State", validate_gl_shader_state
},
628 [VC4_PACKET_NV_SHADER_STATE
] = { 1, 1, 5, "NV Shader State", validate_nv_shader_state
},
630 [VC4_PACKET_CONFIGURATION_BITS
] = { 1, 1, 4, "configuration bits", NULL
},
631 [VC4_PACKET_FLAT_SHADE_FLAGS
] = { 1, 1, 5, "flat shade flags", NULL
},
632 [VC4_PACKET_POINT_SIZE
] = { 1, 1, 5, "point size", NULL
},
633 [VC4_PACKET_LINE_WIDTH
] = { 1, 1, 5, "line width", NULL
},
634 [VC4_PACKET_RHT_X_BOUNDARY
] = { 1, 1, 3, "RHT X boundary", NULL
},
635 [VC4_PACKET_DEPTH_OFFSET
] = { 1, 1, 5, "Depth Offset", NULL
},
636 [VC4_PACKET_CLIP_WINDOW
] = { 1, 1, 9, "Clip Window", NULL
},
637 [VC4_PACKET_VIEWPORT_OFFSET
] = { 1, 1, 5, "Viewport Offset", NULL
},
638 [VC4_PACKET_CLIPPER_XY_SCALING
] = { 1, 1, 9, "Clipper XY Scaling", NULL
},
639 /* Note: The docs say this was also 105, but it was 106 in the
640 * initial userland code drop.
642 [VC4_PACKET_CLIPPER_Z_SCALING
] = { 1, 1, 9, "Clipper Z Scale and Offset", NULL
},
644 [VC4_PACKET_TILE_BINNING_MODE_CONFIG
] = { 1, 0, 16, "tile binning configuration", validate_tile_binning_config
},
646 [VC4_PACKET_TILE_RENDERING_MODE_CONFIG
] = { 0, 1, 11, "tile rendering mode configuration", validate_tile_rendering_mode_config
},
648 [VC4_PACKET_CLEAR_COLORS
] = { 0, 1, 14, "Clear Colors", NULL
},
650 [VC4_PACKET_TILE_COORDINATES
] = { 0, 1, 3, "Tile Coordinates", validate_tile_coordinates
},
652 [VC4_PACKET_GEM_HANDLES
] = { 1, 1, 9, "GEM handles", validate_gem_handles
},
656 vc4_validate_cl(struct drm_device
*dev
,
661 struct exec_info
*exec
)
663 uint32_t dst_offset
= 0;
664 uint32_t src_offset
= 0;
666 while (src_offset
< len
) {
667 void *dst_pkt
= validated
+ dst_offset
;
668 void *src_pkt
= unvalidated
+ src_offset
;
669 u8 cmd
= *(uint8_t *)src_pkt
;
670 const struct cmd_info
*info
;
672 if (cmd
> ARRAY_SIZE(cmd_info
)) {
673 DRM_ERROR("0x%08x: packet %d out of bounds\n",
678 info
= &cmd_info
[cmd
];
680 DRM_ERROR("0x%08x: packet %d invalid\n",
686 DRM_INFO("0x%08x: packet %d (%s) size %d processing...\n",
687 src_offset
, cmd
, info
->name
, info
->len
);
690 if ((is_bin
&& !info
->bin
) ||
691 (!is_bin
&& !info
->render
)) {
692 DRM_ERROR("0x%08x: packet %d (%s) invalid for %s\n",
693 src_offset
, cmd
, info
->name
,
694 is_bin
? "binner" : "render");
698 if (src_offset
+ info
->len
> len
) {
699 DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
700 "exceeds bounds (0x%08x)\n",
701 src_offset
, cmd
, info
->name
, info
->len
,
706 if (cmd
!= VC4_PACKET_GEM_HANDLES
)
707 memcpy(dst_pkt
, src_pkt
, info
->len
);
709 if (info
->func
&& info
->func(exec
,
712 DRM_ERROR("0x%08x: packet %d (%s) failed to "
714 src_offset
, cmd
, info
->name
);
718 src_offset
+= info
->len
;
719 /* GEM handle loading doesn't produce HW packets. */
720 if (cmd
!= VC4_PACKET_GEM_HANDLES
)
721 dst_offset
+= info
->len
;
723 /* When the CL hits halt, it'll stop reading anything else. */
724 if (cmd
== VC4_PACKET_HALT
)
729 exec
->ct0ea
= exec
->ct0ca
+ dst_offset
;
731 if (!exec
->found_start_tile_binning_packet
) {
732 DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
736 if (!exec
->found_tile_rendering_mode_config_packet
) {
737 DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
740 exec
->ct1ea
= exec
->ct1ca
+ dst_offset
;
747 reloc_tex(struct exec_info
*exec
,
748 void *uniform_data_u
,
749 struct vc4_texture_sample_info
*sample
,
750 uint32_t texture_handle_index
)
753 struct drm_gem_cma_object
*tex
;
754 uint32_t p0
= *(uint32_t *)(uniform_data_u
+ sample
->p_offset
[0]);
755 uint32_t p1
= *(uint32_t *)(uniform_data_u
+ sample
->p_offset
[1]);
756 uint32_t p2
= (sample
->p_offset
[2] != ~0 ?
757 *(uint32_t *)(uniform_data_u
+ sample
->p_offset
[2]) : 0);
758 uint32_t p3
= (sample
->p_offset
[3] != ~0 ?
759 *(uint32_t *)(uniform_data_u
+ sample
->p_offset
[3]) : 0);
760 uint32_t *validated_p0
= exec
->uniforms_v
+ sample
->p_offset
[0];
761 uint32_t offset
= p0
& ~0xfff;
762 uint32_t miplevels
= (p0
& 15);
763 uint32_t width
= (p1
>> 8) & 2047;
764 uint32_t height
= (p1
>> 20) & 2047;
765 uint32_t cpp
, tiling_format
, utile_w
, utile_h
;
767 uint32_t cube_map_stride
= 0;
768 enum vc4_texture_data_type type
;
770 if (!vc4_use_bo(exec
, texture_handle_index
, VC4_MODE_RENDER
, &tex
))
773 if (sample
->is_direct
) {
774 uint32_t remaining_size
= tex
->base
.size
- p0
;
775 if (p0
> tex
->base
.size
- 4) {
776 DRM_ERROR("UBO offset greater than UBO size\n");
779 if (p1
> remaining_size
- 4) {
780 DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
783 *validated_p0
= tex
->paddr
+ p0
;
793 if ((p2
& (3 << 30)) == (1 << 30))
794 cube_map_stride
= p2
& 0x3ffff000;
795 if ((p3
& (3 << 30)) == (1 << 30)) {
796 if (cube_map_stride
) {
797 DRM_ERROR("Cube map stride set twice\n");
801 cube_map_stride
= p3
& 0x3ffff000;
803 if (!cube_map_stride
) {
804 DRM_ERROR("Cube map stride not set\n");
809 type
= ((p0
>> 4) & 15) | ((p1
>> 31) << 4);
812 case VC4_TEXTURE_TYPE_RGBA8888
:
813 case VC4_TEXTURE_TYPE_RGBX8888
:
814 case VC4_TEXTURE_TYPE_RGBA32R
:
817 case VC4_TEXTURE_TYPE_RGBA4444
:
818 case VC4_TEXTURE_TYPE_RGBA5551
:
819 case VC4_TEXTURE_TYPE_RGB565
:
820 case VC4_TEXTURE_TYPE_LUMALPHA
:
821 case VC4_TEXTURE_TYPE_S16F
:
822 case VC4_TEXTURE_TYPE_S16
:
825 case VC4_TEXTURE_TYPE_LUMINANCE
:
826 case VC4_TEXTURE_TYPE_ALPHA
:
827 case VC4_TEXTURE_TYPE_S8
:
830 case VC4_TEXTURE_TYPE_ETC1
:
831 case VC4_TEXTURE_TYPE_BW1
:
832 case VC4_TEXTURE_TYPE_A4
:
833 case VC4_TEXTURE_TYPE_A1
:
834 case VC4_TEXTURE_TYPE_RGBA64
:
835 case VC4_TEXTURE_TYPE_YUV422R
:
837 DRM_ERROR("Texture format %d unsupported\n", type
);
840 utile_w
= utile_width(cpp
);
841 utile_h
= utile_height(cpp
);
843 if (type
== VC4_TEXTURE_TYPE_RGBA32R
) {
844 tiling_format
= VC4_TILING_FORMAT_LINEAR
;
846 if (size_is_lt(width
, height
, cpp
))
847 tiling_format
= VC4_TILING_FORMAT_LT
;
849 tiling_format
= VC4_TILING_FORMAT_T
;
852 if (!check_tex_size(exec
, tex
, offset
+ cube_map_stride
* 5,
853 tiling_format
, width
, height
, cpp
)) {
857 /* The mipmap levels are stored before the base of the texture. Make
858 * sure there is actually space in the BO.
860 for (i
= 1; i
<= miplevels
; i
++) {
861 uint32_t level_width
= max(width
>> i
, 1u);
862 uint32_t level_height
= max(height
>> i
, 1u);
863 uint32_t aligned_width
, aligned_height
;
866 /* Once the levels get small enough, they drop from T to LT. */
867 if (tiling_format
== VC4_TILING_FORMAT_T
&&
868 size_is_lt(level_width
, level_height
, cpp
)) {
869 tiling_format
= VC4_TILING_FORMAT_LT
;
872 switch (tiling_format
) {
873 case VC4_TILING_FORMAT_T
:
874 aligned_width
= roundup(level_width
, utile_w
* 8);
875 aligned_height
= roundup(level_height
, utile_h
* 8);
877 case VC4_TILING_FORMAT_LT
:
878 aligned_width
= roundup(level_width
, utile_w
);
879 aligned_height
= roundup(level_height
, utile_h
);
882 aligned_width
= roundup(level_width
, 16 / cpp
);
883 aligned_height
= level_height
;
887 level_size
= aligned_width
* cpp
* aligned_height
;
889 if (offset
< level_size
) {
890 DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
891 "overflowed buffer bounds (offset %d)\n",
892 i
, level_width
, level_height
,
893 aligned_width
, aligned_height
,
898 offset
-= level_size
;
901 *validated_p0
= tex
->paddr
+ p0
;
907 validate_shader_rec(struct drm_device
*dev
,
908 struct exec_info
*exec
,
909 struct vc4_shader_state
*state
)
911 uint32_t *src_handles
;
913 enum shader_rec_reloc_type
{
917 struct shader_rec_reloc
{
918 enum shader_rec_reloc_type type
;
921 static const struct shader_rec_reloc gl_relocs
[] = {
922 { RELOC_CODE
, 4 }, /* fs */
923 { RELOC_CODE
, 16 }, /* vs */
924 { RELOC_CODE
, 28 }, /* cs */
926 static const struct shader_rec_reloc nv_relocs
[] = {
927 { RELOC_CODE
, 4 }, /* fs */
930 const struct shader_rec_reloc
*relocs
;
931 struct drm_gem_cma_object
*bo
[ARRAY_SIZE(gl_relocs
) + 8];
932 uint32_t nr_attributes
= 0, nr_fixed_relocs
, nr_relocs
, packet_size
;
934 struct vc4_validated_shader_info
*validated_shader
= NULL
;
936 if (state
->packet
== VC4_PACKET_NV_SHADER_STATE
) {
938 nr_fixed_relocs
= ARRAY_SIZE(nv_relocs
);
943 nr_fixed_relocs
= ARRAY_SIZE(gl_relocs
);
945 nr_attributes
= state
->addr
& 0x7;
946 if (nr_attributes
== 0)
948 packet_size
= gl_shader_rec_size(state
->addr
);
950 nr_relocs
= nr_fixed_relocs
+ nr_attributes
;
952 if (nr_relocs
* 4 > exec
->shader_rec_size
) {
953 DRM_ERROR("overflowed shader recs reading %d handles "
954 "from %d bytes left\n",
955 nr_relocs
, exec
->shader_rec_size
);
958 src_handles
= exec
->shader_rec_u
;
959 exec
->shader_rec_u
+= nr_relocs
* 4;
960 exec
->shader_rec_size
-= nr_relocs
* 4;
962 if (packet_size
> exec
->shader_rec_size
) {
963 DRM_ERROR("overflowed shader recs copying %db packet "
964 "from %d bytes left\n",
965 packet_size
, exec
->shader_rec_size
);
968 pkt_u
= exec
->shader_rec_u
;
969 pkt_v
= exec
->shader_rec_v
;
970 memcpy(pkt_v
, pkt_u
, packet_size
);
971 exec
->shader_rec_u
+= packet_size
;
972 /* Shader recs have to be aligned to 16 bytes (due to the attribute
973 * flags being in the low bytes), so round the next validated shader
974 * rec address up. This should be safe, since we've got so many
975 * relocations in a shader rec packet.
977 BUG_ON(roundup(packet_size
, 16) - packet_size
> nr_relocs
* 4);
978 exec
->shader_rec_v
+= roundup(packet_size
, 16);
979 exec
->shader_rec_size
-= packet_size
;
981 for (i
= 0; i
< nr_relocs
; i
++) {
982 enum vc4_bo_mode mode
;
984 if (i
< nr_fixed_relocs
&& relocs
[i
].type
== RELOC_CODE
)
985 mode
= VC4_MODE_SHADER
;
987 mode
= VC4_MODE_RENDER
;
989 if (!vc4_use_bo(exec
, src_handles
[i
], mode
, &bo
[i
])) {
994 for (i
= 0; i
< nr_fixed_relocs
; i
++) {
995 uint32_t o
= relocs
[i
].offset
;
996 uint32_t src_offset
= *(uint32_t *)(pkt_u
+ o
);
997 uint32_t *texture_handles_u
;
998 void *uniform_data_u
;
1001 *(uint32_t *)(pkt_v
+ o
) = bo
[i
]->paddr
+ src_offset
;
1003 switch (relocs
[i
].type
) {
1005 kfree(validated_shader
);
1006 validated_shader
= vc4_validate_shader(bo
[i
],
1008 if (!validated_shader
)
1011 if (validated_shader
->uniforms_src_size
>
1012 exec
->uniforms_size
) {
1013 DRM_ERROR("Uniforms src buffer overflow\n");
1017 texture_handles_u
= exec
->uniforms_u
;
1018 uniform_data_u
= (texture_handles_u
+
1019 validated_shader
->num_texture_samples
);
1021 memcpy(exec
->uniforms_v
, uniform_data_u
,
1022 validated_shader
->uniforms_size
);
1025 tex
< validated_shader
->num_texture_samples
;
1027 if (!reloc_tex(exec
,
1029 &validated_shader
->texture_samples
[tex
],
1030 texture_handles_u
[tex
])) {
1035 *(uint32_t *)(pkt_v
+ o
+ 4) = exec
->uniforms_p
;
1037 exec
->uniforms_u
+= validated_shader
->uniforms_src_size
;
1038 exec
->uniforms_v
+= validated_shader
->uniforms_size
;
1039 exec
->uniforms_p
+= validated_shader
->uniforms_size
;
1048 for (i
= 0; i
< nr_attributes
; i
++) {
1049 struct drm_gem_cma_object
*vbo
= bo
[nr_fixed_relocs
+ i
];
1050 uint32_t o
= 36 + i
* 8;
1051 uint32_t offset
= *(uint32_t *)(pkt_u
+ o
+ 0);
1052 uint32_t attr_size
= *(uint8_t *)(pkt_u
+ o
+ 4) + 1;
1053 uint32_t stride
= *(uint8_t *)(pkt_u
+ o
+ 5);
1056 if (state
->addr
& 0x8)
1057 stride
|= (*(uint32_t *)(pkt_u
+ 100 + i
* 4)) & ~0xff;
1059 if (vbo
->base
.size
< offset
||
1060 vbo
->base
.size
- offset
< attr_size
) {
1061 DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
1062 offset
, attr_size
, vbo
->base
.size
);
1067 max_index
= ((vbo
->base
.size
- offset
- attr_size
) /
1069 if (state
->max_index
> max_index
) {
1070 DRM_ERROR("primitives use index %d out of supplied %d\n",
1071 state
->max_index
, max_index
);
1076 *(uint32_t *)(pkt_v
+ o
) = vbo
->paddr
+ offset
;
1079 kfree(validated_shader
);
1084 kfree(validated_shader
);
1089 vc4_validate_shader_recs(struct drm_device
*dev
,
1090 struct exec_info
*exec
)
1095 for (i
= 0; i
< exec
->shader_state_count
; i
++) {
1096 ret
= validate_shader_rec(dev
, exec
, &exec
->shader_state
[i
]);