2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Command list validator for VC4.
27 * The VC4 has no IOMMU between it and system memory. So, a user with
28 * access to execute command lists could escalate privilege by
29 * overwriting system memory (drawing to it as a framebuffer) or
30 * reading system memory it shouldn't (reading it as a texture, or
31 * uniform data, or vertex data).
33 * This validates command lists to ensure that all accesses are within
34 * the bounds of the GEM objects referenced. It explicitly whitelists
35 * packets, and looks at the offsets in any address fields to make
36 * sure they're constrained within the BOs they reference.
38 * Note that because of the validation that's happening anyway, this
39 * is where GEM relocation processing happens.
43 #include "vc4_packet.h"
45 #define VALIDATE_ARGS \
46 struct vc4_exec_info *exec, \
51 /** Return the width in pixels of a 64-byte microtile. */
64 DRM_ERROR("unknown cpp: %d\n", cpp
);
69 /** Return the height in pixels of a 64-byte microtile. */
81 DRM_ERROR("unknown cpp: %d\n", cpp
);
87 * The texture unit decides what tiling format a particular miplevel is using
88 * this function, so we lay out our miptrees accordingly.
91 size_is_lt(uint32_t width
, uint32_t height
, int cpp
)
93 return (width
<= 4 * utile_width(cpp
) ||
94 height
<= 4 * utile_height(cpp
));
98 vc4_use_bo(struct vc4_exec_info
*exec
,
100 enum vc4_bo_mode mode
,
101 struct drm_gem_cma_object
**obj
)
105 if (hindex
>= exec
->bo_count
) {
106 DRM_ERROR("BO index %d greater than BO count %d\n",
107 hindex
, exec
->bo_count
);
111 if (exec
->bo
[hindex
].mode
!= mode
) {
112 if (exec
->bo
[hindex
].mode
== VC4_MODE_UNDECIDED
) {
113 exec
->bo
[hindex
].mode
= mode
;
115 DRM_ERROR("BO index %d reused with mode %d vs %d\n",
116 hindex
, exec
->bo
[hindex
].mode
, mode
);
121 *obj
= exec
->bo
[hindex
].bo
;
126 vc4_use_handle(struct vc4_exec_info
*exec
,
127 uint32_t gem_handles_packet_index
,
128 enum vc4_bo_mode mode
,
129 struct drm_gem_cma_object
**obj
)
131 return vc4_use_bo(exec
, exec
->bo_index
[gem_handles_packet_index
],
136 gl_shader_rec_size(uint32_t pointer_bits
)
138 uint32_t attribute_count
= pointer_bits
& 7;
139 bool extended
= pointer_bits
& 8;
141 if (attribute_count
== 0)
145 return 100 + attribute_count
* 4;
147 return 36 + attribute_count
* 8;
151 check_tex_size(struct vc4_exec_info
*exec
, struct drm_gem_cma_object
*fbo
,
152 uint32_t offset
, uint8_t tiling_format
,
153 uint32_t width
, uint32_t height
, uint8_t cpp
)
155 uint32_t aligned_width
, aligned_height
, stride
, size
;
156 uint32_t utile_w
= utile_width(cpp
);
157 uint32_t utile_h
= utile_height(cpp
);
159 /* The values are limited by the packet/texture parameter bitfields,
160 * so we don't need to worry as much about integer overflow.
162 BUG_ON(width
> 65535);
163 BUG_ON(height
> 65535);
165 switch (tiling_format
) {
166 case VC4_TILING_FORMAT_LINEAR
:
167 aligned_width
= roundup(width
, utile_w
);
168 aligned_height
= height
;
170 case VC4_TILING_FORMAT_T
:
171 aligned_width
= roundup(width
, utile_w
* 8);
172 aligned_height
= roundup(height
, utile_h
* 8);
174 case VC4_TILING_FORMAT_LT
:
175 aligned_width
= roundup(width
, utile_w
);
176 aligned_height
= roundup(height
, utile_h
);
179 DRM_ERROR("buffer tiling %d unsupported\n", tiling_format
);
183 stride
= aligned_width
* cpp
;
185 if (INT_MAX
/ stride
< aligned_height
) {
186 DRM_ERROR("Overflow in fbo size (%dx%d -> %dx%d)\n",
188 aligned_width
, aligned_height
);
191 size
= stride
* aligned_height
;
193 if (size
+ offset
< size
||
194 size
+ offset
> fbo
->base
.size
) {
195 DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %d)\n",
197 aligned_width
, aligned_height
,
198 size
, offset
, fbo
->base
.size
);
206 validate_flush_all(VALIDATE_ARGS
)
208 if (exec
->found_increment_semaphore_packet
) {
209 DRM_ERROR("VC4_PACKET_FLUSH_ALL after "
210 "VC4_PACKET_INCREMENT_SEMAPHORE\n");
218 validate_start_tile_binning(VALIDATE_ARGS
)
220 if (exec
->found_start_tile_binning_packet
) {
221 DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
224 exec
->found_start_tile_binning_packet
= true;
226 if (!exec
->found_tile_binning_mode_config_packet
) {
227 DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
235 validate_increment_semaphore(VALIDATE_ARGS
)
237 if (exec
->found_increment_semaphore_packet
) {
238 DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n");
241 exec
->found_increment_semaphore_packet
= true;
243 /* Once we've found the semaphore increment, there should be one FLUSH
244 * then the end of the command list. The FLUSH actually triggers the
245 * increment, so we only need to make sure there
252 validate_wait_on_semaphore(VALIDATE_ARGS
)
254 if (exec
->found_wait_on_semaphore_packet
) {
255 DRM_ERROR("Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n");
258 exec
->found_wait_on_semaphore_packet
= true;
260 if (!exec
->found_increment_semaphore_packet
) {
261 DRM_ERROR("VC4_PACKET_WAIT_ON_SEMAPHORE without "
262 "VC4_PACKET_INCREMENT_SEMAPHORE\n");
270 validate_branch_to_sublist(VALIDATE_ARGS
)
272 struct drm_gem_cma_object
*target
;
275 if (!vc4_use_handle(exec
, 0, VC4_MODE_TILE_ALLOC
, &target
))
278 if (target
!= exec
->tile_alloc_bo
) {
279 DRM_ERROR("Jumping to BOs other than tile alloc unsupported\n");
283 if (!exec
->found_wait_on_semaphore_packet
) {
284 DRM_ERROR("Jumping to tile alloc before binning finished.\n");
288 offset
= *(uint32_t *)(untrusted
+ 0);
289 if (offset
% exec
->tile_alloc_init_block_size
||
290 offset
/ exec
->tile_alloc_init_block_size
>=
291 exec
->bin_tiles_x
* exec
->bin_tiles_y
) {
292 DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial "
293 "tile allocation space.\n");
297 *(uint32_t *)(validated
+ 0) = target
->paddr
+ offset
;
303 * validate_loadstore_tile_buffer_general() - Validation for
304 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL and
305 * VC4_PACKET_STORE_TILE_BUFFER_GENERAL.
307 * The two packets are nearly the same, except for the TLB-clearing management
308 * bits not being present for loads. Additionally, while stores are executed
309 * immediately (using the current tile coordinates), loads are queued to be
310 * executed when the tile coordinates packet occurs.
312 * Note that coordinates packets are validated to be within the declared
313 * bin_x/y, which themselves are verified to match the rendering-configuration
314 * FB width and height (which the hardware uses to clip loads and stores).
317 validate_loadstore_tile_buffer_general(VALIDATE_ARGS
)
319 uint32_t packet_b0
= *(uint8_t *)(untrusted
+ 0);
320 uint32_t packet_b1
= *(uint8_t *)(untrusted
+ 1);
321 struct drm_gem_cma_object
*fbo
;
322 uint32_t buffer_type
= packet_b0
& 0xf;
323 uint32_t untrusted_address
, offset
, cpp
;
325 switch (buffer_type
) {
326 case VC4_LOADSTORE_TILE_BUFFER_NONE
:
328 case VC4_LOADSTORE_TILE_BUFFER_COLOR
:
329 if ((packet_b1
& VC4_LOADSTORE_TILE_BUFFER_MASK
) ==
330 VC4_LOADSTORE_TILE_BUFFER_RGBA8888
) {
337 case VC4_LOADSTORE_TILE_BUFFER_Z
:
338 case VC4_LOADSTORE_TILE_BUFFER_ZS
:
343 DRM_ERROR("Load/store type %d unsupported\n", buffer_type
);
347 if (!vc4_use_handle(exec
, 0, VC4_MODE_RENDER
, &fbo
))
350 untrusted_address
= *(uint32_t *)(untrusted
+ 2);
351 offset
= untrusted_address
& ~0xf;
353 if (!check_tex_size(exec
, fbo
, offset
,
355 VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK
) >>
356 VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT
),
357 exec
->fb_width
, exec
->fb_height
, cpp
)) {
361 *(uint32_t *)(validated
+ 2) = (offset
+ fbo
->paddr
+
362 (untrusted_address
& 0xf));
368 validate_indexed_prim_list(VALIDATE_ARGS
)
370 struct drm_gem_cma_object
*ib
;
371 uint32_t length
= *(uint32_t *)(untrusted
+ 1);
372 uint32_t offset
= *(uint32_t *)(untrusted
+ 5);
373 uint32_t max_index
= *(uint32_t *)(untrusted
+ 9);
374 uint32_t index_size
= (*(uint8_t *)(untrusted
+ 0) >> 4) ? 2 : 1;
375 struct vc4_shader_state
*shader_state
;
377 if (exec
->found_increment_semaphore_packet
) {
378 DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
382 /* Check overflow condition */
383 if (exec
->shader_state_count
== 0) {
384 DRM_ERROR("shader state must precede primitives\n");
387 shader_state
= &exec
->shader_state
[exec
->shader_state_count
- 1];
389 if (max_index
> shader_state
->max_index
)
390 shader_state
->max_index
= max_index
;
392 if (!vc4_use_handle(exec
, 0, VC4_MODE_RENDER
, &ib
))
395 if (offset
> ib
->base
.size
||
396 (ib
->base
.size
- offset
) / index_size
< length
) {
397 DRM_ERROR("IB access overflow (%d + %d*%d > %d)\n",
398 offset
, length
, index_size
, ib
->base
.size
);
402 *(uint32_t *)(validated
+ 5) = ib
->paddr
+ offset
;
408 validate_gl_array_primitive(VALIDATE_ARGS
)
410 uint32_t length
= *(uint32_t *)(untrusted
+ 1);
411 uint32_t base_index
= *(uint32_t *)(untrusted
+ 5);
413 struct vc4_shader_state
*shader_state
;
415 if (exec
->found_increment_semaphore_packet
) {
416 DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
420 /* Check overflow condition */
421 if (exec
->shader_state_count
== 0) {
422 DRM_ERROR("shader state must precede primitives\n");
425 shader_state
= &exec
->shader_state
[exec
->shader_state_count
- 1];
427 if (length
+ base_index
< length
) {
428 DRM_ERROR("primitive vertex count overflow\n");
431 max_index
= length
+ base_index
- 1;
433 if (max_index
> shader_state
->max_index
)
434 shader_state
->max_index
= max_index
;
440 validate_gl_shader_state(VALIDATE_ARGS
)
442 uint32_t i
= exec
->shader_state_count
++;
444 if (i
>= exec
->shader_state_size
) {
445 DRM_ERROR("More requests for shader states than declared\n");
449 exec
->shader_state
[i
].packet
= VC4_PACKET_GL_SHADER_STATE
;
450 exec
->shader_state
[i
].addr
= *(uint32_t *)untrusted
;
451 exec
->shader_state
[i
].max_index
= 0;
453 if (exec
->shader_state
[i
].addr
& ~0xf) {
454 DRM_ERROR("high bits set in GL shader rec reference\n");
458 *(uint32_t *)validated
= (exec
->shader_rec_p
+
459 exec
->shader_state
[i
].addr
);
461 exec
->shader_rec_p
+=
462 roundup(gl_shader_rec_size(exec
->shader_state
[i
].addr
), 16);
468 validate_nv_shader_state(VALIDATE_ARGS
)
470 uint32_t i
= exec
->shader_state_count
++;
472 if (i
>= exec
->shader_state_size
) {
473 DRM_ERROR("More requests for shader states than declared\n");
477 exec
->shader_state
[i
].packet
= VC4_PACKET_NV_SHADER_STATE
;
478 exec
->shader_state
[i
].addr
= *(uint32_t *)untrusted
;
480 if (exec
->shader_state
[i
].addr
& 15) {
481 DRM_ERROR("NV shader state address 0x%08x misaligned\n",
482 exec
->shader_state
[i
].addr
);
486 *(uint32_t *)validated
= (exec
->shader_state
[i
].addr
+
493 validate_tile_binning_config(VALIDATE_ARGS
)
495 struct drm_gem_cma_object
*tile_allocation
;
496 struct drm_gem_cma_object
*tile_state_data_array
;
498 uint32_t tile_allocation_size
;
500 if (!vc4_use_handle(exec
, 0, VC4_MODE_TILE_ALLOC
, &tile_allocation
) ||
501 !vc4_use_handle(exec
, 1, VC4_MODE_TSDA
, &tile_state_data_array
))
504 if (exec
->found_tile_binning_mode_config_packet
) {
505 DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
508 exec
->found_tile_binning_mode_config_packet
= true;
510 exec
->bin_tiles_x
= *(uint8_t *)(untrusted
+ 12);
511 exec
->bin_tiles_y
= *(uint8_t *)(untrusted
+ 13);
512 flags
= *(uint8_t *)(untrusted
+ 14);
514 if (exec
->bin_tiles_x
== 0 ||
515 exec
->bin_tiles_y
== 0) {
516 DRM_ERROR("Tile binning config of %dx%d too small\n",
517 exec
->bin_tiles_x
, exec
->bin_tiles_y
);
521 /* Our validation relies on the user not getting to set up their own
522 * tile state/tile allocation BO contents.
524 if (!(flags
& VC4_BIN_CONFIG_AUTO_INIT_TSDA
)) {
525 DRM_ERROR("binning config missing "
526 "VC4_BIN_CONFIG_AUTO_INIT_TSDA\n");
530 if (flags
& (VC4_BIN_CONFIG_DB_NON_MS
|
531 VC4_BIN_CONFIG_TILE_BUFFER_64BIT
|
532 VC4_BIN_CONFIG_MS_MODE_4X
)) {
533 DRM_ERROR("unsupported bining config flags 0x%02x\n", flags
);
537 if (*(uint32_t *)(untrusted
+ 0) != 0) {
538 DRM_ERROR("tile allocation offset != 0 unsupported\n");
541 tile_allocation_size
= *(uint32_t *)(untrusted
+ 4);
542 if (tile_allocation_size
> tile_allocation
->base
.size
) {
543 DRM_ERROR("tile allocation size %d > BO size %d\n",
544 tile_allocation_size
, tile_allocation
->base
.size
);
547 *(uint32_t *)validated
= tile_allocation
->paddr
;
548 exec
->tile_alloc_bo
= tile_allocation
;
550 exec
->tile_alloc_init_block_size
= 1 << (5 + ((flags
>> 5) & 3));
551 if (exec
->bin_tiles_x
* exec
->bin_tiles_y
*
552 exec
->tile_alloc_init_block_size
> tile_allocation_size
) {
553 DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n",
554 exec
->bin_tiles_x
* exec
->bin_tiles_y
*
555 exec
->tile_alloc_init_block_size
,
556 tile_allocation_size
);
559 if (*(uint32_t *)(untrusted
+ 8) != 0) {
560 DRM_ERROR("TSDA offset != 0 unsupported\n");
563 if (exec
->bin_tiles_x
* exec
->bin_tiles_y
* 48 >
564 tile_state_data_array
->base
.size
) {
565 DRM_ERROR("TSDA of %db too small for %dx%d bin config\n",
566 tile_state_data_array
->base
.size
,
567 exec
->bin_tiles_x
, exec
->bin_tiles_y
);
569 *(uint32_t *)(validated
+ 8) = tile_state_data_array
->paddr
;
575 validate_tile_rendering_mode_config(VALIDATE_ARGS
)
577 struct drm_gem_cma_object
*fbo
;
578 uint32_t flags
, offset
, cpp
;
580 if (exec
->found_tile_rendering_mode_config_packet
) {
581 DRM_ERROR("Duplicate VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
584 exec
->found_tile_rendering_mode_config_packet
= true;
586 if (!vc4_use_handle(exec
, 0, VC4_MODE_RENDER
, &fbo
))
589 exec
->fb_width
= *(uint16_t *)(untrusted
+ 4);
590 exec
->fb_height
= *(uint16_t *)(untrusted
+ 6);
592 flags
= *(uint16_t *)(untrusted
+ 8);
593 if ((flags
& VC4_RENDER_CONFIG_FORMAT_MASK
) ==
594 VC4_RENDER_CONFIG_FORMAT_RGBA8888
) {
600 offset
= *(uint32_t *)untrusted
;
601 if (!check_tex_size(exec
, fbo
, offset
,
603 VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK
) >>
604 VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT
),
605 exec
->fb_width
, exec
->fb_height
, cpp
)) {
609 *(uint32_t *)validated
= fbo
->paddr
+ offset
;
615 validate_tile_coordinates(VALIDATE_ARGS
)
617 uint8_t tile_x
= *(uint8_t *)(untrusted
+ 0);
618 uint8_t tile_y
= *(uint8_t *)(untrusted
+ 1);
620 if (tile_x
* 64 >= exec
->fb_width
|| tile_y
* 64 >= exec
->fb_height
) {
621 DRM_ERROR("Tile coordinates %d,%d > render config %dx%d\n",
622 tile_x
, tile_y
, exec
->fb_width
, exec
->fb_height
);
630 validate_gem_handles(VALIDATE_ARGS
)
632 memcpy(exec
->bo_index
, untrusted
, sizeof(exec
->bo_index
));
636 static const struct cmd_info
{
641 int (*func
)(struct vc4_exec_info
*exec
, void *validated
,
644 [VC4_PACKET_HALT
] = { 1, 1, 1, "halt", NULL
},
645 [VC4_PACKET_NOP
] = { 1, 1, 1, "nop", NULL
},
646 [VC4_PACKET_FLUSH
] = { 1, 1, 1, "flush", NULL
},
647 [VC4_PACKET_FLUSH_ALL
] = { 1, 0, 1, "flush all state", validate_flush_all
},
648 [VC4_PACKET_START_TILE_BINNING
] = { 1, 0, 1, "start tile binning", validate_start_tile_binning
},
649 [VC4_PACKET_INCREMENT_SEMAPHORE
] = { 1, 0, 1, "increment semaphore", validate_increment_semaphore
},
650 [VC4_PACKET_WAIT_ON_SEMAPHORE
] = { 0, 1, 1, "wait on semaphore", validate_wait_on_semaphore
},
651 /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but
652 * we only use it from the render CL in order to jump into the tile
655 [VC4_PACKET_BRANCH_TO_SUB_LIST
] = { 0, 1, 5, "branch to sublist", validate_branch_to_sublist
},
656 [VC4_PACKET_STORE_MS_TILE_BUFFER
] = { 0, 1, 1, "store MS resolved tile color buffer", NULL
},
657 [VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF
] = { 0, 1, 1, "store MS resolved tile color buffer and EOF", NULL
},
659 [VC4_PACKET_STORE_TILE_BUFFER_GENERAL
] = { 0, 1, 7, "Store Tile Buffer General", validate_loadstore_tile_buffer_general
},
660 [VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
] = { 0, 1, 7, "Load Tile Buffer General", validate_loadstore_tile_buffer_general
},
662 [VC4_PACKET_GL_INDEXED_PRIMITIVE
] = { 1, 1, 14, "Indexed Primitive List", validate_indexed_prim_list
},
664 [VC4_PACKET_GL_ARRAY_PRIMITIVE
] = { 1, 1, 10, "Vertex Array Primitives", validate_gl_array_primitive
},
666 /* This is only used by clipped primitives (packets 48 and 49), which
667 * we don't support parsing yet.
669 [VC4_PACKET_PRIMITIVE_LIST_FORMAT
] = { 1, 1, 2, "primitive list format", NULL
},
671 [VC4_PACKET_GL_SHADER_STATE
] = { 1, 1, 5, "GL Shader State", validate_gl_shader_state
},
672 [VC4_PACKET_NV_SHADER_STATE
] = { 1, 1, 5, "NV Shader State", validate_nv_shader_state
},
674 [VC4_PACKET_CONFIGURATION_BITS
] = { 1, 1, 4, "configuration bits", NULL
},
675 [VC4_PACKET_FLAT_SHADE_FLAGS
] = { 1, 1, 5, "flat shade flags", NULL
},
676 [VC4_PACKET_POINT_SIZE
] = { 1, 1, 5, "point size", NULL
},
677 [VC4_PACKET_LINE_WIDTH
] = { 1, 1, 5, "line width", NULL
},
678 [VC4_PACKET_RHT_X_BOUNDARY
] = { 1, 1, 3, "RHT X boundary", NULL
},
679 [VC4_PACKET_DEPTH_OFFSET
] = { 1, 1, 5, "Depth Offset", NULL
},
680 [VC4_PACKET_CLIP_WINDOW
] = { 1, 1, 9, "Clip Window", NULL
},
681 [VC4_PACKET_VIEWPORT_OFFSET
] = { 1, 1, 5, "Viewport Offset", NULL
},
682 [VC4_PACKET_CLIPPER_XY_SCALING
] = { 1, 1, 9, "Clipper XY Scaling", NULL
},
683 /* Note: The docs say this was also 105, but it was 106 in the
684 * initial userland code drop.
686 [VC4_PACKET_CLIPPER_Z_SCALING
] = { 1, 1, 9, "Clipper Z Scale and Offset", NULL
},
688 [VC4_PACKET_TILE_BINNING_MODE_CONFIG
] = { 1, 0, 16, "tile binning configuration", validate_tile_binning_config
},
690 [VC4_PACKET_TILE_RENDERING_MODE_CONFIG
] = { 0, 1, 11, "tile rendering mode configuration", validate_tile_rendering_mode_config
},
692 [VC4_PACKET_CLEAR_COLORS
] = { 0, 1, 14, "Clear Colors", NULL
},
694 [VC4_PACKET_TILE_COORDINATES
] = { 0, 1, 3, "Tile Coordinates", validate_tile_coordinates
},
696 [VC4_PACKET_GEM_HANDLES
] = { 1, 1, 9, "GEM handles", validate_gem_handles
},
700 vc4_validate_cl(struct drm_device
*dev
,
706 struct vc4_exec_info
*exec
)
708 uint32_t dst_offset
= 0;
709 uint32_t src_offset
= 0;
711 while (src_offset
< len
) {
712 void *dst_pkt
= validated
+ dst_offset
;
713 void *src_pkt
= unvalidated
+ src_offset
;
714 u8 cmd
= *(uint8_t *)src_pkt
;
715 const struct cmd_info
*info
;
717 if (cmd
> ARRAY_SIZE(cmd_info
)) {
718 DRM_ERROR("0x%08x: packet %d out of bounds\n",
723 info
= &cmd_info
[cmd
];
725 DRM_ERROR("0x%08x: packet %d invalid\n",
731 DRM_INFO("0x%08x: packet %d (%s) size %d processing...\n",
732 src_offset
, cmd
, info
->name
, info
->len
);
735 if ((is_bin
&& !info
->bin
) ||
736 (!is_bin
&& !info
->render
)) {
737 DRM_ERROR("0x%08x: packet %d (%s) invalid for %s\n",
738 src_offset
, cmd
, info
->name
,
739 is_bin
? "binner" : "render");
743 if (src_offset
+ info
->len
> len
) {
744 DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
745 "exceeds bounds (0x%08x)\n",
746 src_offset
, cmd
, info
->name
, info
->len
,
751 if (cmd
!= VC4_PACKET_GEM_HANDLES
)
752 memcpy(dst_pkt
, src_pkt
, info
->len
);
754 if (info
->func
&& info
->func(exec
,
757 DRM_ERROR("0x%08x: packet %d (%s) failed to "
759 src_offset
, cmd
, info
->name
);
763 src_offset
+= info
->len
;
764 /* GEM handle loading doesn't produce HW packets. */
765 if (cmd
!= VC4_PACKET_GEM_HANDLES
)
766 dst_offset
+= info
->len
;
768 /* When the CL hits halt, it'll stop reading anything else. */
769 if (cmd
== VC4_PACKET_HALT
)
774 exec
->ct0ea
= exec
->ct0ca
+ dst_offset
;
776 if (has_bin
&& !exec
->found_start_tile_binning_packet
) {
777 DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
781 if (!exec
->found_tile_rendering_mode_config_packet
) {
782 DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
786 /* Make sure that they actually consumed the semaphore
787 * increment from the bin CL. Otherwise a later submit would
788 * have render execute immediately.
790 if (exec
->found_wait_on_semaphore_packet
!= has_bin
) {
791 DRM_ERROR("Render CL %s VC4_PACKET_WAIT_ON_SEMAPHORE\n",
792 exec
->found_wait_on_semaphore_packet
?
796 exec
->ct1ea
= exec
->ct1ca
+ dst_offset
;
803 reloc_tex(struct vc4_exec_info
*exec
,
804 void *uniform_data_u
,
805 struct vc4_texture_sample_info
*sample
,
806 uint32_t texture_handle_index
)
809 struct drm_gem_cma_object
*tex
;
810 uint32_t p0
= *(uint32_t *)(uniform_data_u
+ sample
->p_offset
[0]);
811 uint32_t p1
= *(uint32_t *)(uniform_data_u
+ sample
->p_offset
[1]);
812 uint32_t p2
= (sample
->p_offset
[2] != ~0 ?
813 *(uint32_t *)(uniform_data_u
+ sample
->p_offset
[2]) : 0);
814 uint32_t p3
= (sample
->p_offset
[3] != ~0 ?
815 *(uint32_t *)(uniform_data_u
+ sample
->p_offset
[3]) : 0);
816 uint32_t *validated_p0
= exec
->uniforms_v
+ sample
->p_offset
[0];
817 uint32_t offset
= p0
& ~0xfff;
818 uint32_t miplevels
= (p0
& 15);
819 uint32_t width
= (p1
>> 8) & 2047;
820 uint32_t height
= (p1
>> 20) & 2047;
821 uint32_t cpp
, tiling_format
, utile_w
, utile_h
;
823 uint32_t cube_map_stride
= 0;
824 enum vc4_texture_data_type type
;
826 if (!vc4_use_bo(exec
, texture_handle_index
, VC4_MODE_RENDER
, &tex
))
829 if (sample
->is_direct
) {
830 uint32_t remaining_size
= tex
->base
.size
- p0
;
831 if (p0
> tex
->base
.size
- 4) {
832 DRM_ERROR("UBO offset greater than UBO size\n");
835 if (p1
> remaining_size
- 4) {
836 DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
839 *validated_p0
= tex
->paddr
+ p0
;
849 if ((p2
& (3 << 30)) == (1 << 30))
850 cube_map_stride
= p2
& 0x3ffff000;
851 if ((p3
& (3 << 30)) == (1 << 30)) {
852 if (cube_map_stride
) {
853 DRM_ERROR("Cube map stride set twice\n");
857 cube_map_stride
= p3
& 0x3ffff000;
859 if (!cube_map_stride
) {
860 DRM_ERROR("Cube map stride not set\n");
865 type
= ((p0
>> 4) & 15) | ((p1
>> 31) << 4);
868 case VC4_TEXTURE_TYPE_RGBA8888
:
869 case VC4_TEXTURE_TYPE_RGBX8888
:
870 case VC4_TEXTURE_TYPE_RGBA32R
:
873 case VC4_TEXTURE_TYPE_RGBA4444
:
874 case VC4_TEXTURE_TYPE_RGBA5551
:
875 case VC4_TEXTURE_TYPE_RGB565
:
876 case VC4_TEXTURE_TYPE_LUMALPHA
:
877 case VC4_TEXTURE_TYPE_S16F
:
878 case VC4_TEXTURE_TYPE_S16
:
881 case VC4_TEXTURE_TYPE_LUMINANCE
:
882 case VC4_TEXTURE_TYPE_ALPHA
:
883 case VC4_TEXTURE_TYPE_S8
:
886 case VC4_TEXTURE_TYPE_ETC1
:
887 case VC4_TEXTURE_TYPE_BW1
:
888 case VC4_TEXTURE_TYPE_A4
:
889 case VC4_TEXTURE_TYPE_A1
:
890 case VC4_TEXTURE_TYPE_RGBA64
:
891 case VC4_TEXTURE_TYPE_YUV422R
:
893 DRM_ERROR("Texture format %d unsupported\n", type
);
896 utile_w
= utile_width(cpp
);
897 utile_h
= utile_height(cpp
);
899 if (type
== VC4_TEXTURE_TYPE_RGBA32R
) {
900 tiling_format
= VC4_TILING_FORMAT_LINEAR
;
902 if (size_is_lt(width
, height
, cpp
))
903 tiling_format
= VC4_TILING_FORMAT_LT
;
905 tiling_format
= VC4_TILING_FORMAT_T
;
908 if (!check_tex_size(exec
, tex
, offset
+ cube_map_stride
* 5,
909 tiling_format
, width
, height
, cpp
)) {
913 /* The mipmap levels are stored before the base of the texture. Make
914 * sure there is actually space in the BO.
916 for (i
= 1; i
<= miplevels
; i
++) {
917 uint32_t level_width
= max(width
>> i
, 1u);
918 uint32_t level_height
= max(height
>> i
, 1u);
919 uint32_t aligned_width
, aligned_height
;
922 /* Once the levels get small enough, they drop from T to LT. */
923 if (tiling_format
== VC4_TILING_FORMAT_T
&&
924 size_is_lt(level_width
, level_height
, cpp
)) {
925 tiling_format
= VC4_TILING_FORMAT_LT
;
928 switch (tiling_format
) {
929 case VC4_TILING_FORMAT_T
:
930 aligned_width
= roundup(level_width
, utile_w
* 8);
931 aligned_height
= roundup(level_height
, utile_h
* 8);
933 case VC4_TILING_FORMAT_LT
:
934 aligned_width
= roundup(level_width
, utile_w
);
935 aligned_height
= roundup(level_height
, utile_h
);
938 aligned_width
= roundup(level_width
, utile_w
);
939 aligned_height
= level_height
;
943 level_size
= aligned_width
* cpp
* aligned_height
;
945 if (offset
< level_size
) {
946 DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
947 "overflowed buffer bounds (offset %d)\n",
948 i
, level_width
, level_height
,
949 aligned_width
, aligned_height
,
954 offset
-= level_size
;
957 *validated_p0
= tex
->paddr
+ p0
;
963 validate_shader_rec(struct drm_device
*dev
,
964 struct vc4_exec_info
*exec
,
965 struct vc4_shader_state
*state
)
967 uint32_t *src_handles
;
969 enum shader_rec_reloc_type
{
973 struct shader_rec_reloc
{
974 enum shader_rec_reloc_type type
;
977 static const struct shader_rec_reloc gl_relocs
[] = {
978 { RELOC_CODE
, 4 }, /* fs */
979 { RELOC_CODE
, 16 }, /* vs */
980 { RELOC_CODE
, 28 }, /* cs */
982 static const struct shader_rec_reloc nv_relocs
[] = {
983 { RELOC_CODE
, 4 }, /* fs */
986 const struct shader_rec_reloc
*relocs
;
987 struct drm_gem_cma_object
*bo
[ARRAY_SIZE(gl_relocs
) + 8];
988 uint32_t nr_attributes
= 0, nr_fixed_relocs
, nr_relocs
, packet_size
;
990 struct vc4_validated_shader_info
*validated_shader
= NULL
;
992 if (state
->packet
== VC4_PACKET_NV_SHADER_STATE
) {
994 nr_fixed_relocs
= ARRAY_SIZE(nv_relocs
);
999 nr_fixed_relocs
= ARRAY_SIZE(gl_relocs
);
1001 nr_attributes
= state
->addr
& 0x7;
1002 if (nr_attributes
== 0)
1004 packet_size
= gl_shader_rec_size(state
->addr
);
1006 nr_relocs
= nr_fixed_relocs
+ nr_attributes
;
1008 if (nr_relocs
* 4 > exec
->shader_rec_size
) {
1009 DRM_ERROR("overflowed shader recs reading %d handles "
1010 "from %d bytes left\n",
1011 nr_relocs
, exec
->shader_rec_size
);
1014 src_handles
= exec
->shader_rec_u
;
1015 exec
->shader_rec_u
+= nr_relocs
* 4;
1016 exec
->shader_rec_size
-= nr_relocs
* 4;
1018 if (packet_size
> exec
->shader_rec_size
) {
1019 DRM_ERROR("overflowed shader recs copying %db packet "
1020 "from %d bytes left\n",
1021 packet_size
, exec
->shader_rec_size
);
1024 pkt_u
= exec
->shader_rec_u
;
1025 pkt_v
= exec
->shader_rec_v
;
1026 memcpy(pkt_v
, pkt_u
, packet_size
);
1027 exec
->shader_rec_u
+= packet_size
;
1028 /* Shader recs have to be aligned to 16 bytes (due to the attribute
1029 * flags being in the low bytes), so round the next validated shader
1030 * rec address up. This should be safe, since we've got so many
1031 * relocations in a shader rec packet.
1033 BUG_ON(roundup(packet_size
, 16) - packet_size
> nr_relocs
* 4);
1034 exec
->shader_rec_v
+= roundup(packet_size
, 16);
1035 exec
->shader_rec_size
-= packet_size
;
1037 for (i
= 0; i
< nr_relocs
; i
++) {
1038 enum vc4_bo_mode mode
;
1040 if (i
< nr_fixed_relocs
&& relocs
[i
].type
== RELOC_CODE
)
1041 mode
= VC4_MODE_SHADER
;
1043 mode
= VC4_MODE_RENDER
;
1045 if (!vc4_use_bo(exec
, src_handles
[i
], mode
, &bo
[i
])) {
1050 for (i
= 0; i
< nr_fixed_relocs
; i
++) {
1051 uint32_t o
= relocs
[i
].offset
;
1052 uint32_t src_offset
= *(uint32_t *)(pkt_u
+ o
);
1053 uint32_t *texture_handles_u
;
1054 void *uniform_data_u
;
1057 *(uint32_t *)(pkt_v
+ o
) = bo
[i
]->paddr
+ src_offset
;
1059 switch (relocs
[i
].type
) {
1061 if (src_offset
!= 0) {
1062 DRM_ERROR("Shaders must be at offset 0 of "
1067 kfree(validated_shader
);
1068 validated_shader
= vc4_validate_shader(bo
[i
]);
1069 if (!validated_shader
)
1072 if (validated_shader
->uniforms_src_size
>
1073 exec
->uniforms_size
) {
1074 DRM_ERROR("Uniforms src buffer overflow\n");
1078 texture_handles_u
= exec
->uniforms_u
;
1079 uniform_data_u
= (texture_handles_u
+
1080 validated_shader
->num_texture_samples
);
1082 memcpy(exec
->uniforms_v
, uniform_data_u
,
1083 validated_shader
->uniforms_size
);
1086 tex
< validated_shader
->num_texture_samples
;
1088 if (!reloc_tex(exec
,
1090 &validated_shader
->texture_samples
[tex
],
1091 texture_handles_u
[tex
])) {
1096 *(uint32_t *)(pkt_v
+ o
+ 4) = exec
->uniforms_p
;
1098 exec
->uniforms_u
+= validated_shader
->uniforms_src_size
;
1099 exec
->uniforms_v
+= validated_shader
->uniforms_size
;
1100 exec
->uniforms_p
+= validated_shader
->uniforms_size
;
1109 for (i
= 0; i
< nr_attributes
; i
++) {
1110 struct drm_gem_cma_object
*vbo
= bo
[nr_fixed_relocs
+ i
];
1111 uint32_t o
= 36 + i
* 8;
1112 uint32_t offset
= *(uint32_t *)(pkt_u
+ o
+ 0);
1113 uint32_t attr_size
= *(uint8_t *)(pkt_u
+ o
+ 4) + 1;
1114 uint32_t stride
= *(uint8_t *)(pkt_u
+ o
+ 5);
1117 if (state
->addr
& 0x8)
1118 stride
|= (*(uint32_t *)(pkt_u
+ 100 + i
* 4)) & ~0xff;
1120 if (vbo
->base
.size
< offset
||
1121 vbo
->base
.size
- offset
< attr_size
) {
1122 DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
1123 offset
, attr_size
, vbo
->base
.size
);
1128 max_index
= ((vbo
->base
.size
- offset
- attr_size
) /
1130 if (state
->max_index
> max_index
) {
1131 DRM_ERROR("primitives use index %d out of supplied %d\n",
1132 state
->max_index
, max_index
);
1137 *(uint32_t *)(pkt_v
+ o
) = vbo
->paddr
+ offset
;
1140 kfree(validated_shader
);
1145 kfree(validated_shader
);
1150 vc4_validate_shader_recs(struct drm_device
*dev
,
1151 struct vc4_exec_info
*exec
)
1156 for (i
= 0; i
< exec
->shader_state_count
; i
++) {
1157 ret
= validate_shader_rec(dev
, exec
, &exec
->shader_state
[i
]);