2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/u_blitter.h"
25 #include "util/u_prim.h"
26 #include "util/u_format.h"
27 #include "util/u_pack_color.h"
28 #include "util/u_prim_restart.h"
29 #include "util/u_upload_mgr.h"
30 #include "indices/u_primconvert.h"
32 #include "vc5_context.h"
33 #include "vc5_resource.h"
35 #include "broadcom/cle/v3d_packet_v33_pack.h"
36 #include "broadcom/compiler/v3d_compiler.h"
39 * Does the initial bining command list setup for drawing to a given FBO.
42 vc5_start_draw(struct vc5_context
*vc5
)
44 struct vc5_job
*job
= vc5
->job
;
49 /* Get space to emit our BCL state, using a branch to jump to a new BO
52 vc5_cl_ensure_space_with_branch(&job
->bcl
, 256 /* XXX */);
54 job
->submit
.bcl_start
= job
->bcl
.bo
->offset
;
55 vc5_job_add_bo(job
, job
->bcl
.bo
);
57 job
->tile_alloc
= vc5_bo_alloc(vc5
->screen
, 1024 * 1024, "tile alloc");
58 struct vc5_bo
*tsda
= vc5_bo_alloc(vc5
->screen
,
64 /* "Binning mode lists start with a Tile Binning Mode Configuration
67 * Part1 signals the end of binning config setup.
69 cl_emit(&job
->bcl
, TILE_BINNING_MODE_CONFIGURATION_PART2
, config
) {
70 config
.tile_allocation_memory_address
=
71 cl_address(job
->tile_alloc
, 0);
72 config
.tile_allocation_memory_size
= job
->tile_alloc
->size
;
75 cl_emit(&job
->bcl
, TILE_BINNING_MODE_CONFIGURATION_PART1
, config
) {
76 config
.tile_state_data_array_base_address
=
79 config
.width_in_tiles
= job
->draw_tiles_x
;
80 config
.height_in_tiles
= job
->draw_tiles_y
;
83 config
.number_of_render_targets
= 1;
85 config
.multisample_mode_4x
= job
->msaa
;
87 config
.maximum_bpp_of_all_render_targets
= job
->internal_bpp
;
90 vc5_bo_unreference(&tsda
);
92 /* There's definitely nothing in the VCD cache we want. */
93 cl_emit(&job
->bcl
, FLUSH_VCD_CACHE
, bin
);
95 /* "Binning mode lists must have a Start Tile Binning item (6) after
96 * any prefix state data before the binning list proper starts."
98 cl_emit(&job
->bcl
, START_TILE_BINNING
, bin
);
100 cl_emit(&job
->bcl
, PRIMITIVE_LIST_FORMAT
, fmt
) {
101 fmt
.data_type
= LIST_INDEXED
;
102 fmt
.primitive_type
= LIST_TRIANGLES
;
105 job
->needs_flush
= true;
106 job
->draw_width
= vc5
->framebuffer
.width
;
107 job
->draw_height
= vc5
->framebuffer
.height
;
111 vc5_predraw_check_textures(struct pipe_context
*pctx
,
112 struct vc5_texture_stateobj
*stage_tex
)
114 struct vc5_context
*vc5
= vc5_context(pctx
);
116 for (int i
= 0; i
< stage_tex
->num_textures
; i
++) {
117 struct pipe_sampler_view
*view
= stage_tex
->textures
[i
];
121 vc5_flush_jobs_writing_resource(vc5
, view
->texture
);
125 static struct vc5_cl_reloc
126 vc5_get_default_values(struct vc5_context
*vc5
)
128 struct vc5_job
*job
= vc5
->job
;
130 /* VC5_DIRTY_VTXSTATE */
131 struct vc5_vertex_stateobj
*vtx
= vc5
->vtx
;
133 /* Set up the default values for attributes. */
134 vc5_cl_ensure_space(&job
->indirect
, 4 * 4 * vtx
->num_elements
, 4);
135 struct vc5_cl_reloc default_values
=
136 cl_address(job
->indirect
.bo
, cl_offset(&job
->indirect
));
137 vc5_bo_reference(default_values
.bo
);
139 struct vc5_cl_out
*defaults
= cl_start(&job
->indirect
);
140 for (int i
= 0; i
< vtx
->num_elements
; i
++) {
141 cl_aligned_f(&defaults
, 0.0);
142 cl_aligned_f(&defaults
, 0.0);
143 cl_aligned_f(&defaults
, 0.0);
144 cl_aligned_f(&defaults
, 1.0);
146 cl_end(&job
->indirect
, defaults
);
148 return default_values
;
152 vc5_emit_gl_shader_state(struct vc5_context
*vc5
,
153 const struct pipe_draw_info
*info
)
155 struct vc5_job
*job
= vc5
->job
;
156 /* VC5_DIRTY_VTXSTATE */
157 struct vc5_vertex_stateobj
*vtx
= vc5
->vtx
;
158 /* VC5_DIRTY_VTXBUF */
159 struct vc5_vertexbuf_stateobj
*vertexbuf
= &vc5
->vertexbuf
;
161 /* Upload the uniforms to the indirect CL first */
162 struct vc5_cl_reloc fs_uniforms
=
163 vc5_write_uniforms(vc5
, vc5
->prog
.fs
,
164 &vc5
->constbuf
[PIPE_SHADER_FRAGMENT
],
166 struct vc5_cl_reloc vs_uniforms
=
167 vc5_write_uniforms(vc5
, vc5
->prog
.vs
,
168 &vc5
->constbuf
[PIPE_SHADER_VERTEX
],
170 struct vc5_cl_reloc cs_uniforms
=
171 vc5_write_uniforms(vc5
, vc5
->prog
.cs
,
172 &vc5
->constbuf
[PIPE_SHADER_VERTEX
],
174 struct vc5_cl_reloc default_values
= vc5_get_default_values(vc5
);
176 uint32_t shader_rec_offset
=
177 vc5_cl_ensure_space(&job
->indirect
,
178 cl_packet_length(GL_SHADER_STATE_RECORD
) +
180 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD
),
183 cl_emit(&job
->indirect
, GL_SHADER_STATE_RECORD
, shader
) {
184 shader
.enable_clipping
= true;
185 /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
186 shader
.point_size_in_shaded_vertex_data
=
187 (info
->mode
== PIPE_PRIM_POINTS
&&
188 vc5
->rasterizer
->base
.point_size_per_vertex
);
190 /* Must be set if the shader modifies Z, discards, or modifies
191 * the sample mask. For any of these cases, the fragment
192 * shader needs to write the Z value (even just discards).
194 shader
.fragment_shader_does_z_writes
=
195 (vc5
->prog
.fs
->prog_data
.fs
->writes_z
||
196 vc5
->prog
.fs
->prog_data
.fs
->discard
);
198 shader
.number_of_varyings_in_fragment_shader
=
199 vc5
->prog
.fs
->prog_data
.base
->num_inputs
;
201 shader
.propagate_nans
= true;
203 shader
.coordinate_shader_code_address
=
204 cl_address(vc5
->prog
.cs
->bo
, 0);
205 shader
.vertex_shader_code_address
=
206 cl_address(vc5
->prog
.vs
->bo
, 0);
207 shader
.fragment_shader_code_address
=
208 cl_address(vc5
->prog
.fs
->bo
, 0);
210 /* XXX: Use combined input/output size flag in the common
213 shader
.coordinate_shader_has_separate_input_and_output_vpm_blocks
= true;
214 shader
.vertex_shader_has_separate_input_and_output_vpm_blocks
= true;
215 shader
.coordinate_shader_input_vpm_segment_size
=
216 vc5
->prog
.cs
->prog_data
.vs
->vpm_input_size
;
217 shader
.vertex_shader_input_vpm_segment_size
=
218 vc5
->prog
.vs
->prog_data
.vs
->vpm_input_size
;
220 shader
.coordinate_shader_output_vpm_segment_size
=
221 vc5
->prog
.cs
->prog_data
.vs
->vpm_output_size
;
222 shader
.vertex_shader_output_vpm_segment_size
=
223 vc5
->prog
.vs
->prog_data
.vs
->vpm_output_size
;
225 shader
.coordinate_shader_uniforms_address
= cs_uniforms
;
226 shader
.vertex_shader_uniforms_address
= vs_uniforms
;
227 shader
.fragment_shader_uniforms_address
= fs_uniforms
;
229 shader
.vertex_id_read_by_coordinate_shader
=
230 vc5
->prog
.cs
->prog_data
.vs
->uses_vid
;
231 shader
.instance_id_read_by_coordinate_shader
=
232 vc5
->prog
.cs
->prog_data
.vs
->uses_iid
;
233 shader
.vertex_id_read_by_vertex_shader
=
234 vc5
->prog
.vs
->prog_data
.vs
->uses_vid
;
235 shader
.instance_id_read_by_vertex_shader
=
236 vc5
->prog
.vs
->prog_data
.vs
->uses_iid
;
238 shader
.address_of_default_attribute_values
= default_values
;
241 for (int i
= 0; i
< vtx
->num_elements
; i
++) {
242 struct pipe_vertex_element
*elem
= &vtx
->pipe
[i
];
243 struct pipe_vertex_buffer
*vb
=
244 &vertexbuf
->vb
[elem
->vertex_buffer_index
];
245 struct vc5_resource
*rsc
= vc5_resource(vb
->buffer
.resource
);
246 const struct util_format_description
*desc
=
247 util_format_description(elem
->src_format
);
249 uint32_t offset
= vb
->buffer_offset
+ elem
->src_offset
;
251 cl_emit(&job
->indirect
, GL_SHADER_STATE_ATTRIBUTE_RECORD
, attr
) {
252 uint32_t r_size
= desc
->channel
[0].size
;
254 /* vec_size == 0 means 4 */
255 attr
.vec_size
= desc
->nr_channels
& 3;
257 switch (desc
->channel
[0].type
) {
258 case UTIL_FORMAT_TYPE_FLOAT
:
260 attr
.type
= ATTRIBUTE_FLOAT
;
262 assert(r_size
== 16);
263 attr
.type
= ATTRIBUTE_HALF_FLOAT
;
267 case UTIL_FORMAT_TYPE_SIGNED
:
268 case UTIL_FORMAT_TYPE_UNSIGNED
:
271 attr
.type
= ATTRIBUTE_INT
;
274 attr
.type
= ATTRIBUTE_SHORT
;
277 attr
.type
= ATTRIBUTE_INT2_10_10_10
;
280 attr
.type
= ATTRIBUTE_BYTE
;
284 "format %s unsupported\n",
286 attr
.type
= ATTRIBUTE_BYTE
;
293 "format %s unsupported\n",
298 attr
.signed_int_type
=
299 desc
->channel
[0].type
== UTIL_FORMAT_TYPE_SIGNED
;
301 attr
.normalized_int_type
= desc
->channel
[0].normalized
;
302 attr
.read_as_int_uint
= desc
->channel
[0].pure_integer
;
303 attr
.address
= cl_address(rsc
->bo
, offset
);
304 attr
.stride
= vb
->stride
;
305 attr
.instance_divisor
= elem
->instance_divisor
;
306 attr
.number_of_values_read_by_coordinate_shader
=
307 vc5
->prog
.cs
->prog_data
.vs
->vattr_sizes
[i
];
308 attr
.number_of_values_read_by_vertex_shader
=
309 vc5
->prog
.vs
->prog_data
.vs
->vattr_sizes
[i
];
313 cl_emit(&job
->bcl
, GL_SHADER_STATE
, state
) {
314 state
.address
= cl_address(job
->indirect
.bo
, shader_rec_offset
);
315 state
.number_of_attribute_arrays
= vtx
->num_elements
;
318 vc5_bo_unreference(&cs_uniforms
.bo
);
319 vc5_bo_unreference(&vs_uniforms
.bo
);
320 vc5_bo_unreference(&fs_uniforms
.bo
);
321 vc5_bo_unreference(&default_values
.bo
);
323 job
->shader_rec_count
++;
327 vc5_draw_vbo(struct pipe_context
*pctx
, const struct pipe_draw_info
*info
)
329 struct vc5_context
*vc5
= vc5_context(pctx
);
331 if (!info
->count_from_stream_output
&& !info
->indirect
&&
332 !info
->primitive_restart
&&
333 !u_trim_pipe_prim(info
->mode
, (unsigned*)&info
->count
))
336 /* Fall back for weird desktop GL primitive restart values. */
337 if (info
->primitive_restart
&&
341 switch (info
->index_size
) {
350 if (info
->restart_index
!= mask
) {
351 util_draw_vbo_without_prim_restart(pctx
, info
);
356 if (info
->mode
>= PIPE_PRIM_QUADS
) {
357 util_primconvert_save_rasterizer_state(vc5
->primconvert
, &vc5
->rasterizer
->base
);
358 util_primconvert_draw_vbo(vc5
->primconvert
, info
);
359 perf_debug("Fallback conversion for %d %s vertices\n",
360 info
->count
, u_prim_name(info
->mode
));
364 /* Before setting up the draw, flush anything writing to the textures
367 vc5_predraw_check_textures(pctx
, &vc5
->verttex
);
368 vc5_predraw_check_textures(pctx
, &vc5
->fragtex
);
370 struct vc5_job
*job
= vc5_get_job_for_fbo(vc5
);
372 /* Get space to emit our draw call into the BCL, using a branch to
373 * jump to a new BO if necessary.
375 vc5_cl_ensure_space_with_branch(&job
->bcl
, 256 /* XXX */);
377 if (vc5
->prim_mode
!= info
->mode
) {
378 vc5
->prim_mode
= info
->mode
;
379 vc5
->dirty
|= VC5_DIRTY_PRIM_MODE
;
383 vc5_update_compiled_shaders(vc5
, info
->mode
);
385 vc5_emit_state(pctx
);
387 if (vc5
->dirty
& (VC5_DIRTY_VTXBUF
|
389 VC5_DIRTY_PRIM_MODE
|
390 VC5_DIRTY_RASTERIZER
|
391 VC5_DIRTY_COMPILED_CS
|
392 VC5_DIRTY_COMPILED_VS
|
393 VC5_DIRTY_COMPILED_FS
|
394 vc5
->prog
.cs
->uniform_dirty_bits
|
395 vc5
->prog
.vs
->uniform_dirty_bits
|
396 vc5
->prog
.fs
->uniform_dirty_bits
)) {
397 vc5_emit_gl_shader_state(vc5
, info
);
402 /* The Base Vertex/Base Instance packet sets those values to nonzero
403 * for the next draw call only.
405 if (info
->index_bias
|| info
->start_instance
) {
406 cl_emit(&job
->bcl
, BASE_VERTEX_BASE_INSTANCE
, base
) {
407 base
.base_instance
= info
->start_instance
;
408 base
.base_vertex
= info
->index_bias
;
412 /* The HW only processes transform feedback on primitives with the
415 uint32_t prim_tf_enable
= 0;
416 if (vc5
->prog
.bind_vs
->num_tf_outputs
)
417 prim_tf_enable
= (V3D_PRIM_POINTS_TF
- V3D_PRIM_POINTS
);
419 /* Note that the primitive type fields match with OpenGL/gallium
420 * definitions, up to but not including QUADS.
422 if (info
->index_size
) {
423 uint32_t index_size
= info
->index_size
;
424 uint32_t offset
= info
->start
* index_size
;
425 struct pipe_resource
*prsc
;
426 if (info
->has_user_indices
) {
428 u_upload_data(vc5
->uploader
, 0,
429 info
->count
* info
->index_size
, 4,
433 prsc
= info
->index
.resource
;
435 struct vc5_resource
*rsc
= vc5_resource(prsc
);
437 if (info
->instance_count
> 1) {
438 cl_emit(&job
->bcl
, INDEXED_INSTANCED_PRIMITIVE_LIST
, prim
) {
439 prim
.index_type
= ffs(info
->index_size
) - 1;
440 prim
.maximum_index
= (1u << 31) - 1; /* XXX */
441 prim
.address_of_indices_list
=
442 cl_address(rsc
->bo
, offset
);
443 prim
.mode
= info
->mode
| prim_tf_enable
;
444 prim
.enable_primitive_restarts
= info
->primitive_restart
;
446 prim
.number_of_instances
= info
->instance_count
;
447 prim
.instance_length
= info
->count
;
450 cl_emit(&job
->bcl
, INDEXED_PRIMITIVE_LIST
, prim
) {
451 prim
.index_type
= ffs(info
->index_size
) - 1;
452 prim
.length
= info
->count
;
453 prim
.maximum_index
= (1u << 31) - 1; /* XXX */
454 prim
.address_of_indices_list
=
455 cl_address(rsc
->bo
, offset
);
456 prim
.mode
= info
->mode
| prim_tf_enable
;
457 prim
.enable_primitive_restarts
= info
->primitive_restart
;
461 job
->draw_calls_queued
++;
463 if (info
->has_user_indices
)
464 pipe_resource_reference(&prsc
, NULL
);
466 if (info
->instance_count
> 1) {
467 cl_emit(&job
->bcl
, VERTEX_ARRAY_INSTANCED_PRIMITIVES
, prim
) {
468 prim
.mode
= info
->mode
| prim_tf_enable
;
469 prim
.index_of_first_vertex
= info
->start
;
470 prim
.number_of_instances
= info
->instance_count
;
471 prim
.instance_length
= info
->count
;
474 cl_emit(&job
->bcl
, VERTEX_ARRAY_PRIMITIVES
, prim
) {
475 prim
.mode
= info
->mode
| prim_tf_enable
;
476 prim
.length
= info
->count
;
477 prim
.index_of_first_vertex
= info
->start
;
481 job
->draw_calls_queued
++;
483 if (vc5
->zsa
&& job
->zsbuf
&&
484 (vc5
->zsa
->base
.depth
.enabled
||
485 vc5
->zsa
->base
.stencil
[0].enabled
)) {
486 struct vc5_resource
*rsc
= vc5_resource(job
->zsbuf
->texture
);
487 vc5_job_add_bo(job
, rsc
->bo
);
489 if (vc5
->zsa
->base
.depth
.enabled
) {
490 job
->resolve
|= PIPE_CLEAR_DEPTH
;
491 rsc
->initialized_buffers
= PIPE_CLEAR_DEPTH
;
493 if (vc5
->zsa
->early_z_enable
)
494 job
->uses_early_z
= true;
497 if (vc5
->zsa
->base
.stencil
[0].enabled
) {
498 job
->resolve
|= PIPE_CLEAR_STENCIL
;
499 rsc
->initialized_buffers
|= PIPE_CLEAR_STENCIL
;
503 for (int i
= 0; i
< VC5_MAX_DRAW_BUFFERS
; i
++) {
504 uint32_t bit
= PIPE_CLEAR_COLOR0
<< i
;
506 if (job
->resolve
& bit
|| !job
->cbufs
[i
])
508 struct vc5_resource
*rsc
= vc5_resource(job
->cbufs
[i
]->texture
);
511 vc5_job_add_bo(job
, rsc
->bo
);
514 if (V3D_DEBUG
& V3D_DEBUG_ALWAYS_FLUSH
)
519 pack_rgba(enum pipe_format format
, const float *rgba
)
522 util_pack_color(rgba
, format
, &uc
);
523 if (util_format_get_blocksize(format
) == 2)
530 vc5_clear(struct pipe_context
*pctx
, unsigned buffers
,
531 const union pipe_color_union
*color
, double depth
, unsigned stencil
)
533 struct vc5_context
*vc5
= vc5_context(pctx
);
534 struct vc5_job
*job
= vc5_get_job_for_fbo(vc5
);
536 /* We can't flag new buffers for clearing once we've queued draws. We
537 * could avoid this by using the 3d engine to clear.
539 if (job
->draw_calls_queued
) {
540 perf_debug("Flushing rendering to process new clear.\n");
541 vc5_job_submit(vc5
, job
);
542 job
= vc5_get_job_for_fbo(vc5
);
545 if (buffers
& PIPE_CLEAR_COLOR0
) {
546 struct vc5_resource
*rsc
=
547 vc5_resource(vc5
->framebuffer
.cbufs
[0]->texture
);
548 uint32_t clear_color
;
551 if (vc5_rt_format_is_565(vc5
->framebuffer
.cbufs
[0]->format
)) {
552 /* In 565 mode, the hardware will be packing our color
555 clear_color
= pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM
,
558 /* Otherwise, we need to do this packing because we
559 * support multiple swizzlings of RGBA8888.
562 pack_rgba(vc5
->framebuffer
.cbufs
[0]->format
,
566 clear_color
= pack_rgba(vc5
->framebuffer
.cbufs
[0]->format
,
569 job
->clear_color
[0] = job
->clear_color
[1] = clear_color
;
570 rsc
->initialized_buffers
|= (buffers
& PIPE_CLEAR_COLOR0
);
573 unsigned zsclear
= buffers
& PIPE_CLEAR_DEPTHSTENCIL
;
575 struct vc5_resource
*rsc
=
576 vc5_resource(vc5
->framebuffer
.zsbuf
->texture
);
578 if (zsclear
& PIPE_CLEAR_DEPTH
)
579 job
->clear_z
= depth
;
580 if (zsclear
& PIPE_CLEAR_STENCIL
)
581 job
->clear_s
= stencil
;
583 rsc
->initialized_buffers
|= zsclear
;
588 job
->draw_max_x
= vc5
->framebuffer
.width
;
589 job
->draw_max_y
= vc5
->framebuffer
.height
;
590 job
->cleared
|= buffers
;
591 job
->resolve
|= buffers
;
597 vc5_clear_render_target(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
598 const union pipe_color_union
*color
,
599 unsigned x
, unsigned y
, unsigned w
, unsigned h
,
600 bool render_condition_enabled
)
602 fprintf(stderr
, "unimpl: clear RT\n");
606 vc5_clear_depth_stencil(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
607 unsigned buffers
, double depth
, unsigned stencil
,
608 unsigned x
, unsigned y
, unsigned w
, unsigned h
,
609 bool render_condition_enabled
)
611 fprintf(stderr
, "unimpl: clear DS\n");
615 vc5_draw_init(struct pipe_context
*pctx
)
617 pctx
->draw_vbo
= vc5_draw_vbo
;
618 pctx
->clear
= vc5_clear
;
619 pctx
->clear_render_target
= vc5_clear_render_target
;
620 pctx
->clear_depth_stencil
= vc5_clear_depth_stencil
;