2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/u_blitter.h"
25 #include "util/u_prim.h"
26 #include "util/u_format.h"
27 #include "util/u_pack_color.h"
28 #include "util/u_prim_restart.h"
29 #include "util/u_upload_mgr.h"
30 #include "indices/u_primconvert.h"
32 #include "v3d_context.h"
33 #include "v3d_resource.h"
35 #include "broadcom/compiler/v3d_compiler.h"
36 #include "broadcom/common/v3d_macros.h"
37 #include "broadcom/cle/v3dx_pack.h"
40 * Does the initial bining command list setup for drawing to a given FBO.
43 v3d_start_draw(struct v3d_context
*v3d
)
45 struct v3d_job
*job
= v3d
->job
;
50 /* Get space to emit our BCL state, using a branch to jump to a new BO
53 v3d_cl_ensure_space_with_branch(&job
->bcl
, 256 /* XXX */);
55 job
->submit
.bcl_start
= job
->bcl
.bo
->offset
;
56 v3d_job_add_bo(job
, job
->bcl
.bo
);
58 job
->tile_alloc
= v3d_bo_alloc(v3d
->screen
, 1024 * 1024, "tile_alloc");
59 uint32_t tsda_per_tile_size
= v3d
->screen
->devinfo
.ver
>= 40 ? 256 : 64;
60 job
->tile_state
= v3d_bo_alloc(v3d
->screen
,
67 cl_emit(&job
->bcl
, TILE_BINNING_MODE_CFG
, config
) {
68 config
.width_in_pixels
= v3d
->framebuffer
.width
;
69 config
.height_in_pixels
= v3d
->framebuffer
.height
;
70 config
.number_of_render_targets
=
71 MAX2(v3d
->framebuffer
.nr_cbufs
, 1);
73 config
.multisample_mode_4x
= job
->msaa
;
75 config
.maximum_bpp_of_all_render_targets
= job
->internal_bpp
;
77 #else /* V3D_VERSION < 40 */
78 /* "Binning mode lists start with a Tile Binning Mode Configuration
81 * Part1 signals the end of binning config setup.
83 cl_emit(&job
->bcl
, TILE_BINNING_MODE_CFG_PART2
, config
) {
84 config
.tile_allocation_memory_address
=
85 cl_address(job
->tile_alloc
, 0);
86 config
.tile_allocation_memory_size
= job
->tile_alloc
->size
;
89 cl_emit(&job
->bcl
, TILE_BINNING_MODE_CFG_PART1
, config
) {
90 config
.tile_state_data_array_base_address
=
91 cl_address(job
->tile_state
, 0);
93 config
.width_in_tiles
= job
->draw_tiles_x
;
94 config
.height_in_tiles
= job
->draw_tiles_y
;
96 config
.number_of_render_targets
=
97 MAX2(v3d
->framebuffer
.nr_cbufs
, 1);
99 config
.multisample_mode_4x
= job
->msaa
;
101 config
.maximum_bpp_of_all_render_targets
= job
->internal_bpp
;
103 #endif /* V3D_VERSION < 40 */
105 /* There's definitely nothing in the VCD cache we want. */
106 cl_emit(&job
->bcl
, FLUSH_VCD_CACHE
, bin
);
108 /* Disable any leftover OQ state from another job. */
109 cl_emit(&job
->bcl
, OCCLUSION_QUERY_COUNTER
, counter
);
111 /* "Binning mode lists must have a Start Tile Binning item (6) after
112 * any prefix state data before the binning list proper starts."
114 cl_emit(&job
->bcl
, START_TILE_BINNING
, bin
);
116 job
->needs_flush
= true;
117 job
->draw_width
= v3d
->framebuffer
.width
;
118 job
->draw_height
= v3d
->framebuffer
.height
;
122 v3d_predraw_check_stage_inputs(struct pipe_context
*pctx
,
123 enum pipe_shader_type s
)
125 struct v3d_context
*v3d
= v3d_context(pctx
);
127 /* Flush writes to textures we're sampling. */
128 for (int i
= 0; i
< v3d
->tex
[s
].num_textures
; i
++) {
129 struct pipe_sampler_view
*view
= v3d
->tex
[s
].textures
[i
];
133 v3d_flush_jobs_writing_resource(v3d
, view
->texture
);
136 /* Flush writes to UBOs. */
137 foreach_bit(i
, v3d
->constbuf
[s
].enabled_mask
) {
138 struct pipe_constant_buffer
*cb
= &v3d
->constbuf
[s
].cb
[i
];
140 v3d_flush_jobs_writing_resource(v3d
, cb
->buffer
);
145 v3d_emit_gl_shader_state(struct v3d_context
*v3d
,
146 const struct pipe_draw_info
*info
)
148 struct v3d_job
*job
= v3d
->job
;
149 /* VC5_DIRTY_VTXSTATE */
150 struct v3d_vertex_stateobj
*vtx
= v3d
->vtx
;
151 /* VC5_DIRTY_VTXBUF */
152 struct v3d_vertexbuf_stateobj
*vertexbuf
= &v3d
->vertexbuf
;
154 /* Upload the uniforms to the indirect CL first */
155 struct v3d_cl_reloc fs_uniforms
=
156 v3d_write_uniforms(v3d
, v3d
->prog
.fs
,
157 PIPE_SHADER_FRAGMENT
);
158 struct v3d_cl_reloc vs_uniforms
=
159 v3d_write_uniforms(v3d
, v3d
->prog
.vs
,
161 struct v3d_cl_reloc cs_uniforms
=
162 v3d_write_uniforms(v3d
, v3d
->prog
.cs
,
165 /* See GFXH-930 workaround below */
166 uint32_t num_elements_to_emit
= MAX2(vtx
->num_elements
, 1);
167 uint32_t shader_rec_offset
=
168 v3d_cl_ensure_space(&job
->indirect
,
169 cl_packet_length(GL_SHADER_STATE_RECORD
) +
170 num_elements_to_emit
*
171 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD
),
174 cl_emit(&job
->indirect
, GL_SHADER_STATE_RECORD
, shader
) {
175 shader
.enable_clipping
= true;
176 /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
177 shader
.point_size_in_shaded_vertex_data
=
178 (info
->mode
== PIPE_PRIM_POINTS
&&
179 v3d
->rasterizer
->base
.point_size_per_vertex
);
181 /* Must be set if the shader modifies Z, discards, or modifies
182 * the sample mask. For any of these cases, the fragment
183 * shader needs to write the Z value (even just discards).
185 shader
.fragment_shader_does_z_writes
=
186 (v3d
->prog
.fs
->prog_data
.fs
->writes_z
||
187 v3d
->prog
.fs
->prog_data
.fs
->discard
);
189 shader
.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2
=
190 v3d
->prog
.fs
->prog_data
.fs
->uses_center_w
;
192 shader
.number_of_varyings_in_fragment_shader
=
193 v3d
->prog
.fs
->prog_data
.base
->num_inputs
;
195 shader
.coordinate_shader_propagate_nans
= true;
196 shader
.vertex_shader_propagate_nans
= true;
197 shader
.fragment_shader_propagate_nans
= true;
199 shader
.coordinate_shader_code_address
=
200 cl_address(v3d_resource(v3d
->prog
.cs
->resource
)->bo
,
201 v3d
->prog
.cs
->offset
);
202 shader
.vertex_shader_code_address
=
203 cl_address(v3d_resource(v3d
->prog
.vs
->resource
)->bo
,
204 v3d
->prog
.vs
->offset
);
205 shader
.fragment_shader_code_address
=
206 cl_address(v3d_resource(v3d
->prog
.fs
->resource
)->bo
,
207 v3d
->prog
.fs
->offset
);
209 /* XXX: Use combined input/output size flag in the common
212 shader
.coordinate_shader_has_separate_input_and_output_vpm_blocks
=
213 v3d
->prog
.cs
->prog_data
.vs
->separate_segments
;
214 shader
.vertex_shader_has_separate_input_and_output_vpm_blocks
=
215 v3d
->prog
.vs
->prog_data
.vs
->separate_segments
;
217 shader
.coordinate_shader_input_vpm_segment_size
=
218 v3d
->prog
.cs
->prog_data
.vs
->vpm_input_size
;
219 shader
.vertex_shader_input_vpm_segment_size
=
220 v3d
->prog
.vs
->prog_data
.vs
->vpm_input_size
;
222 shader
.coordinate_shader_output_vpm_segment_size
=
223 v3d
->prog
.cs
->prog_data
.vs
->vpm_output_size
;
224 shader
.vertex_shader_output_vpm_segment_size
=
225 v3d
->prog
.vs
->prog_data
.vs
->vpm_output_size
;
227 shader
.coordinate_shader_uniforms_address
= cs_uniforms
;
228 shader
.vertex_shader_uniforms_address
= vs_uniforms
;
229 shader
.fragment_shader_uniforms_address
= fs_uniforms
;
231 #if V3D_VERSION >= 41
232 shader
.min_coord_shader_input_segments_required_in_play
= 1;
233 shader
.min_vertex_shader_input_segments_required_in_play
= 1;
235 shader
.coordinate_shader_4_way_threadable
=
236 v3d
->prog
.cs
->prog_data
.vs
->base
.threads
== 4;
237 shader
.vertex_shader_4_way_threadable
=
238 v3d
->prog
.vs
->prog_data
.vs
->base
.threads
== 4;
239 shader
.fragment_shader_4_way_threadable
=
240 v3d
->prog
.fs
->prog_data
.fs
->base
.threads
== 4;
242 shader
.coordinate_shader_start_in_final_thread_section
=
243 v3d
->prog
.cs
->prog_data
.vs
->base
.single_seg
;
244 shader
.vertex_shader_start_in_final_thread_section
=
245 v3d
->prog
.vs
->prog_data
.vs
->base
.single_seg
;
246 shader
.fragment_shader_start_in_final_thread_section
=
247 v3d
->prog
.fs
->prog_data
.fs
->base
.single_seg
;
249 shader
.coordinate_shader_4_way_threadable
=
250 v3d
->prog
.cs
->prog_data
.vs
->base
.threads
== 4;
251 shader
.coordinate_shader_2_way_threadable
=
252 v3d
->prog
.cs
->prog_data
.vs
->base
.threads
== 2;
253 shader
.vertex_shader_4_way_threadable
=
254 v3d
->prog
.vs
->prog_data
.vs
->base
.threads
== 4;
255 shader
.vertex_shader_2_way_threadable
=
256 v3d
->prog
.vs
->prog_data
.vs
->base
.threads
== 2;
257 shader
.fragment_shader_4_way_threadable
=
258 v3d
->prog
.fs
->prog_data
.fs
->base
.threads
== 4;
259 shader
.fragment_shader_2_way_threadable
=
260 v3d
->prog
.fs
->prog_data
.fs
->base
.threads
== 2;
263 shader
.vertex_id_read_by_coordinate_shader
=
264 v3d
->prog
.cs
->prog_data
.vs
->uses_vid
;
265 shader
.instance_id_read_by_coordinate_shader
=
266 v3d
->prog
.cs
->prog_data
.vs
->uses_iid
;
267 shader
.vertex_id_read_by_vertex_shader
=
268 v3d
->prog
.vs
->prog_data
.vs
->uses_vid
;
269 shader
.instance_id_read_by_vertex_shader
=
270 v3d
->prog
.vs
->prog_data
.vs
->uses_iid
;
272 shader
.address_of_default_attribute_values
=
273 cl_address(v3d_resource(vtx
->defaults
)->bo
,
274 vtx
->defaults_offset
);
277 for (int i
= 0; i
< vtx
->num_elements
; i
++) {
278 struct pipe_vertex_element
*elem
= &vtx
->pipe
[i
];
279 struct pipe_vertex_buffer
*vb
=
280 &vertexbuf
->vb
[elem
->vertex_buffer_index
];
281 struct v3d_resource
*rsc
= v3d_resource(vb
->buffer
.resource
);
283 const uint32_t size
=
284 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD
);
285 cl_emit_with_prepacked(&job
->indirect
,
286 GL_SHADER_STATE_ATTRIBUTE_RECORD
,
287 &vtx
->attrs
[i
* size
], attr
) {
288 attr
.stride
= vb
->stride
;
289 attr
.address
= cl_address(rsc
->bo
,
292 attr
.number_of_values_read_by_coordinate_shader
=
293 v3d
->prog
.cs
->prog_data
.vs
->vattr_sizes
[i
];
294 attr
.number_of_values_read_by_vertex_shader
=
295 v3d
->prog
.vs
->prog_data
.vs
->vattr_sizes
[i
];
296 #if V3D_VERSION >= 41
297 attr
.maximum_index
= 0xffffff;
300 STATIC_ASSERT(sizeof(vtx
->attrs
) >= VC5_MAX_ATTRIBUTES
* size
);
303 if (vtx
->num_elements
== 0) {
304 /* GFXH-930: At least one attribute must be enabled and read
305 * by CS and VS. If we have no attributes being consumed by
306 * the shader, set up a dummy to be loaded into the VPM.
308 cl_emit(&job
->indirect
, GL_SHADER_STATE_ATTRIBUTE_RECORD
, attr
) {
309 /* Valid address of data whose value will be unused. */
310 attr
.address
= cl_address(job
->indirect
.bo
, 0);
312 attr
.type
= ATTRIBUTE_FLOAT
;
316 attr
.number_of_values_read_by_coordinate_shader
= 1;
317 attr
.number_of_values_read_by_vertex_shader
= 1;
321 cl_emit(&job
->bcl
, VCM_CACHE_SIZE
, vcm
) {
322 vcm
.number_of_16_vertex_batches_for_binning
=
323 v3d
->prog
.cs
->prog_data
.vs
->vcm_cache_size
;
324 vcm
.number_of_16_vertex_batches_for_rendering
=
325 v3d
->prog
.vs
->prog_data
.vs
->vcm_cache_size
;
328 cl_emit(&job
->bcl
, GL_SHADER_STATE
, state
) {
329 state
.address
= cl_address(job
->indirect
.bo
, shader_rec_offset
);
330 state
.number_of_attribute_arrays
= num_elements_to_emit
;
333 v3d_bo_unreference(&cs_uniforms
.bo
);
334 v3d_bo_unreference(&vs_uniforms
.bo
);
335 v3d_bo_unreference(&fs_uniforms
.bo
);
337 job
->shader_rec_count
++;
341 * Computes the various transform feedback statistics, since they can't be
342 * recorded by CL packets.
345 v3d_tf_statistics_record(struct v3d_context
*v3d
,
346 const struct pipe_draw_info
*info
,
349 if (!v3d
->active_queries
)
352 uint32_t prims
= u_prims_for_vertices(info
->mode
, info
->count
);
353 v3d
->prims_generated
+= prims
;
356 /* XXX: Only count if we didn't overflow. */
357 v3d
->tf_prims_generated
+= prims
;
362 v3d_update_job_ez(struct v3d_context
*v3d
, struct v3d_job
*job
)
364 switch (v3d
->zsa
->ez_state
) {
365 case VC5_EZ_UNDECIDED
:
366 /* If the Z/S state didn't pick a direction but didn't
367 * disable, then go along with the current EZ state. This
368 * allows EZ optimization for Z func == EQUAL or NEVER.
374 /* If the Z/S state picked a direction, then it needs to match
375 * the current direction if we've decided on one.
377 if (job
->ez_state
== VC5_EZ_UNDECIDED
)
378 job
->ez_state
= v3d
->zsa
->ez_state
;
379 else if (job
->ez_state
!= v3d
->zsa
->ez_state
)
380 job
->ez_state
= VC5_EZ_DISABLED
;
383 case VC5_EZ_DISABLED
:
384 /* If the current Z/S state disables EZ because of a bad Z
385 * func or stencil operation, then we can't do any more EZ in
388 job
->ez_state
= VC5_EZ_DISABLED
;
392 /* If the FS affects the Z of the pixels, then it may update against
393 * the chosen EZ direction (though we could use
394 * ARB_conservative_depth's hints to avoid this)
396 if (v3d
->prog
.fs
->prog_data
.fs
->writes_z
) {
397 job
->ez_state
= VC5_EZ_DISABLED
;
400 if (job
->first_ez_state
== VC5_EZ_UNDECIDED
&&
401 (job
->ez_state
!= VC5_EZ_DISABLED
|| job
->draw_calls_queued
== 0))
402 job
->first_ez_state
= job
->ez_state
;
406 v3d_draw_vbo(struct pipe_context
*pctx
, const struct pipe_draw_info
*info
)
408 struct v3d_context
*v3d
= v3d_context(pctx
);
410 if (!info
->count_from_stream_output
&& !info
->indirect
&&
411 !info
->primitive_restart
&&
412 !u_trim_pipe_prim(info
->mode
, (unsigned*)&info
->count
))
415 /* Fall back for weird desktop GL primitive restart values. */
416 if (info
->primitive_restart
&&
420 switch (info
->index_size
) {
429 if (info
->restart_index
!= mask
) {
430 util_draw_vbo_without_prim_restart(pctx
, info
);
435 if (info
->mode
>= PIPE_PRIM_QUADS
) {
436 util_primconvert_save_rasterizer_state(v3d
->primconvert
, &v3d
->rasterizer
->base
);
437 util_primconvert_draw_vbo(v3d
->primconvert
, info
);
438 perf_debug("Fallback conversion for %d %s vertices\n",
439 info
->count
, u_prim_name(info
->mode
));
443 /* Before setting up the draw, flush anything writing to the textures
446 for (int s
= 0; s
< PIPE_SHADER_TYPES
; s
++)
447 v3d_predraw_check_stage_inputs(pctx
, s
);
449 struct v3d_job
*job
= v3d_get_job_for_fbo(v3d
);
451 /* If vertex texturing depends on the output of rendering, we need to
452 * ensure that that rendering is complete before we run a coordinate
453 * shader that depends on it.
455 * Given that doing that is unusual, for now we just block the binner
456 * on the last submitted render, rather than tracking the last
457 * rendering to each texture's BO.
459 if (v3d
->tex
[PIPE_SHADER_VERTEX
].num_textures
) {
460 perf_debug("Blocking binner on last render "
461 "due to vertex texturing.\n");
462 job
->submit
.in_sync_bcl
= v3d
->out_sync
;
465 /* Get space to emit our draw call into the BCL, using a branch to
466 * jump to a new BO if necessary.
468 v3d_cl_ensure_space_with_branch(&job
->bcl
, 256 /* XXX */);
470 if (v3d
->prim_mode
!= info
->mode
) {
471 v3d
->prim_mode
= info
->mode
;
472 v3d
->dirty
|= VC5_DIRTY_PRIM_MODE
;
476 v3d_update_compiled_shaders(v3d
, info
->mode
);
477 v3d_update_job_ez(v3d
, job
);
479 #if V3D_VERSION >= 41
480 v3d41_emit_state(pctx
);
482 v3d33_emit_state(pctx
);
485 if (v3d
->dirty
& (VC5_DIRTY_VTXBUF
|
487 VC5_DIRTY_PRIM_MODE
|
488 VC5_DIRTY_RASTERIZER
|
489 VC5_DIRTY_COMPILED_CS
|
490 VC5_DIRTY_COMPILED_VS
|
491 VC5_DIRTY_COMPILED_FS
|
492 v3d
->prog
.cs
->uniform_dirty_bits
|
493 v3d
->prog
.vs
->uniform_dirty_bits
|
494 v3d
->prog
.fs
->uniform_dirty_bits
)) {
495 v3d_emit_gl_shader_state(v3d
, info
);
500 /* The Base Vertex/Base Instance packet sets those values to nonzero
501 * for the next draw call only.
503 if (info
->index_bias
|| info
->start_instance
) {
504 cl_emit(&job
->bcl
, BASE_VERTEX_BASE_INSTANCE
, base
) {
505 base
.base_instance
= info
->start_instance
;
506 base
.base_vertex
= info
->index_bias
;
510 uint32_t prim_tf_enable
= 0;
512 /* V3D 3.x: The HW only processes transform feedback on primitives
515 if (v3d
->streamout
.num_targets
)
516 prim_tf_enable
= (V3D_PRIM_POINTS_TF
- V3D_PRIM_POINTS
);
519 v3d_tf_statistics_record(v3d
, info
, v3d
->streamout
.num_targets
);
521 /* Note that the primitive type fields match with OpenGL/gallium
522 * definitions, up to but not including QUADS.
524 if (info
->index_size
) {
525 uint32_t index_size
= info
->index_size
;
526 uint32_t offset
= info
->start
* index_size
;
527 struct pipe_resource
*prsc
;
528 if (info
->has_user_indices
) {
530 u_upload_data(v3d
->uploader
, 0,
531 info
->count
* info
->index_size
, 4,
535 prsc
= info
->index
.resource
;
537 struct v3d_resource
*rsc
= v3d_resource(prsc
);
539 #if V3D_VERSION >= 40
540 cl_emit(&job
->bcl
, INDEX_BUFFER_SETUP
, ib
) {
541 ib
.address
= cl_address(rsc
->bo
, 0);
542 ib
.size
= rsc
->bo
->size
;
546 if (info
->instance_count
> 1) {
547 cl_emit(&job
->bcl
, INDEXED_INSTANCED_PRIM_LIST
, prim
) {
548 prim
.index_type
= ffs(info
->index_size
) - 1;
549 #if V3D_VERSION >= 40
550 prim
.index_offset
= offset
;
551 #else /* V3D_VERSION < 40 */
552 prim
.maximum_index
= (1u << 31) - 1; /* XXX */
553 prim
.address_of_indices_list
=
554 cl_address(rsc
->bo
, offset
);
555 #endif /* V3D_VERSION < 40 */
556 prim
.mode
= info
->mode
| prim_tf_enable
;
557 prim
.enable_primitive_restarts
= info
->primitive_restart
;
559 prim
.number_of_instances
= info
->instance_count
;
560 prim
.instance_length
= info
->count
;
563 cl_emit(&job
->bcl
, INDEXED_PRIM_LIST
, prim
) {
564 prim
.index_type
= ffs(info
->index_size
) - 1;
565 prim
.length
= info
->count
;
566 #if V3D_VERSION >= 40
567 prim
.index_offset
= offset
;
568 #else /* V3D_VERSION < 40 */
569 prim
.maximum_index
= (1u << 31) - 1; /* XXX */
570 prim
.address_of_indices_list
=
571 cl_address(rsc
->bo
, offset
);
572 #endif /* V3D_VERSION < 40 */
573 prim
.mode
= info
->mode
| prim_tf_enable
;
574 prim
.enable_primitive_restarts
= info
->primitive_restart
;
578 job
->draw_calls_queued
++;
580 if (info
->has_user_indices
)
581 pipe_resource_reference(&prsc
, NULL
);
583 if (info
->instance_count
> 1) {
584 cl_emit(&job
->bcl
, VERTEX_ARRAY_INSTANCED_PRIMS
, prim
) {
585 prim
.mode
= info
->mode
| prim_tf_enable
;
586 prim
.index_of_first_vertex
= info
->start
;
587 prim
.number_of_instances
= info
->instance_count
;
588 prim
.instance_length
= info
->count
;
591 cl_emit(&job
->bcl
, VERTEX_ARRAY_PRIMS
, prim
) {
592 prim
.mode
= info
->mode
| prim_tf_enable
;
593 prim
.length
= info
->count
;
594 prim
.index_of_first_vertex
= info
->start
;
599 /* A flush is required in between a TF draw and any following TF specs
600 * packet, or the GPU may hang. Just flush each time for now.
602 if (v3d
->streamout
.num_targets
)
603 cl_emit(&job
->bcl
, TRANSFORM_FEEDBACK_FLUSH_AND_COUNT
, flush
);
605 job
->draw_calls_queued
++;
607 /* Increment the TF offsets by how many verts we wrote. XXX: This
608 * needs some clamping to the buffer size.
610 for (int i
= 0; i
< v3d
->streamout
.num_targets
; i
++)
611 v3d
->streamout
.offsets
[i
] += info
->count
;
613 if (v3d
->zsa
&& job
->zsbuf
&& v3d
->zsa
->base
.depth
.enabled
) {
614 struct v3d_resource
*rsc
= v3d_resource(job
->zsbuf
->texture
);
615 v3d_job_add_bo(job
, rsc
->bo
);
617 job
->load
|= PIPE_CLEAR_DEPTH
& ~job
->clear
;
618 if (v3d
->zsa
->base
.depth
.writemask
)
619 job
->store
|= PIPE_CLEAR_DEPTH
;
620 rsc
->initialized_buffers
= PIPE_CLEAR_DEPTH
;
623 if (v3d
->zsa
&& job
->zsbuf
&& v3d
->zsa
->base
.stencil
[0].enabled
) {
624 struct v3d_resource
*rsc
= v3d_resource(job
->zsbuf
->texture
);
625 if (rsc
->separate_stencil
)
626 rsc
= rsc
->separate_stencil
;
628 v3d_job_add_bo(job
, rsc
->bo
);
630 job
->load
|= PIPE_CLEAR_STENCIL
& ~job
->clear
;
631 if (v3d
->zsa
->base
.stencil
[0].writemask
||
632 v3d
->zsa
->base
.stencil
[1].writemask
) {
633 job
->store
|= PIPE_CLEAR_STENCIL
;
635 rsc
->initialized_buffers
|= PIPE_CLEAR_STENCIL
;
638 for (int i
= 0; i
< VC5_MAX_DRAW_BUFFERS
; i
++) {
639 uint32_t bit
= PIPE_CLEAR_COLOR0
<< i
;
640 int blend_rt
= v3d
->blend
->base
.independent_blend_enable
? i
: 0;
642 if (job
->store
& bit
|| !job
->cbufs
[i
])
644 struct v3d_resource
*rsc
= v3d_resource(job
->cbufs
[i
]->texture
);
646 job
->load
|= bit
& ~job
->clear
;
647 if (v3d
->blend
->base
.rt
[blend_rt
].colormask
)
649 v3d_job_add_bo(job
, rsc
->bo
);
652 if (job
->referenced_size
> 768 * 1024 * 1024) {
653 perf_debug("Flushing job with %dkb to try to free up memory\n",
654 job
->referenced_size
/ 1024);
658 if (V3D_DEBUG
& V3D_DEBUG_ALWAYS_FLUSH
)
663 * Implements gallium's clear() hook (glClear()) by drawing a pair of triangles.
666 v3d_draw_clear(struct v3d_context
*v3d
,
668 const union pipe_color_union
*color
,
669 double depth
, unsigned stencil
)
671 static const union pipe_color_union dummy_color
= {};
673 /* The blitter util dereferences the color regardless, even though the
674 * gallium clear API may not pass one in when only Z/S are cleared.
677 color
= &dummy_color
;
679 v3d_blitter_save(v3d
);
680 util_blitter_clear(v3d
->blitter
,
681 v3d
->framebuffer
.width
,
682 v3d
->framebuffer
.height
,
683 util_framebuffer_get_num_layers(&v3d
->framebuffer
),
684 buffers
, color
, depth
, stencil
);
688 * Attempts to perform the GL clear by using the TLB's fast clear at the start
692 v3d_tlb_clear(struct v3d_job
*job
, unsigned buffers
,
693 const union pipe_color_union
*color
,
694 double depth
, unsigned stencil
)
696 struct v3d_context
*v3d
= job
->v3d
;
698 if (job
->draw_calls_queued
) {
699 /* If anything in the CL has drawn using the buffer, then the
700 * TLB clear we're trying to add now would happen before that
703 buffers
&= ~(job
->load
| job
->store
);
706 /* GFXH-1461: If we were to emit a load of just depth or just stencil,
707 * then the clear for the other may get lost. We need to decide now
708 * if it would be possible to need to emit a load of just one after
709 * we've set up our TLB clears.
711 if (buffers
& PIPE_CLEAR_DEPTHSTENCIL
&&
712 (buffers
& PIPE_CLEAR_DEPTHSTENCIL
) != PIPE_CLEAR_DEPTHSTENCIL
&&
714 util_format_is_depth_and_stencil(job
->zsbuf
->texture
->format
)) {
715 buffers
&= ~PIPE_CLEAR_DEPTHSTENCIL
;
718 for (int i
= 0; i
< VC5_MAX_DRAW_BUFFERS
; i
++) {
719 uint32_t bit
= PIPE_CLEAR_COLOR0
<< i
;
720 if (!(buffers
& bit
))
723 struct pipe_surface
*psurf
= v3d
->framebuffer
.cbufs
[i
];
724 struct v3d_surface
*surf
= v3d_surface(psurf
);
725 struct v3d_resource
*rsc
= v3d_resource(psurf
->texture
);
728 uint32_t internal_size
= 4 << surf
->internal_bpp
;
730 static union pipe_color_union swapped_color
;
731 if (v3d
->swap_color_rb
& (1 << i
)) {
732 swapped_color
.f
[0] = color
->f
[2];
733 swapped_color
.f
[1] = color
->f
[1];
734 swapped_color
.f
[2] = color
->f
[0];
735 swapped_color
.f
[3] = color
->f
[3];
736 color
= &swapped_color
;
739 switch (surf
->internal_type
) {
740 case V3D_INTERNAL_TYPE_8
:
741 util_pack_color(color
->f
, PIPE_FORMAT_R8G8B8A8_UNORM
,
743 memcpy(job
->clear_color
[i
], uc
.ui
, internal_size
);
745 case V3D_INTERNAL_TYPE_8I
:
746 case V3D_INTERNAL_TYPE_8UI
:
747 job
->clear_color
[i
][0] = ((color
->ui
[0] & 0xff) |
748 (color
->ui
[1] & 0xff) << 8 |
749 (color
->ui
[2] & 0xff) << 16 |
750 (color
->ui
[3] & 0xff) << 24);
752 case V3D_INTERNAL_TYPE_16F
:
753 util_pack_color(color
->f
, PIPE_FORMAT_R16G16B16A16_FLOAT
,
755 memcpy(job
->clear_color
[i
], uc
.ui
, internal_size
);
757 case V3D_INTERNAL_TYPE_16I
:
758 case V3D_INTERNAL_TYPE_16UI
:
759 job
->clear_color
[i
][0] = ((color
->ui
[0] & 0xffff) |
761 job
->clear_color
[i
][1] = ((color
->ui
[2] & 0xffff) |
764 case V3D_INTERNAL_TYPE_32F
:
765 case V3D_INTERNAL_TYPE_32I
:
766 case V3D_INTERNAL_TYPE_32UI
:
767 memcpy(job
->clear_color
[i
], color
->ui
, internal_size
);
771 rsc
->initialized_buffers
|= bit
;
774 unsigned zsclear
= buffers
& PIPE_CLEAR_DEPTHSTENCIL
;
776 struct v3d_resource
*rsc
=
777 v3d_resource(v3d
->framebuffer
.zsbuf
->texture
);
779 if (zsclear
& PIPE_CLEAR_DEPTH
)
780 job
->clear_z
= depth
;
781 if (zsclear
& PIPE_CLEAR_STENCIL
)
782 job
->clear_s
= stencil
;
784 rsc
->initialized_buffers
|= zsclear
;
789 job
->draw_max_x
= v3d
->framebuffer
.width
;
790 job
->draw_max_y
= v3d
->framebuffer
.height
;
791 job
->clear
|= buffers
;
792 job
->store
|= buffers
;
800 v3d_clear(struct pipe_context
*pctx
, unsigned buffers
,
801 const union pipe_color_union
*color
, double depth
, unsigned stencil
)
803 struct v3d_context
*v3d
= v3d_context(pctx
);
804 struct v3d_job
*job
= v3d_get_job_for_fbo(v3d
);
806 buffers
&= ~v3d_tlb_clear(job
, buffers
, color
, depth
, stencil
);
809 v3d_draw_clear(v3d
, buffers
, color
, depth
, stencil
);
813 v3d_clear_render_target(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
814 const union pipe_color_union
*color
,
815 unsigned x
, unsigned y
, unsigned w
, unsigned h
,
816 bool render_condition_enabled
)
818 fprintf(stderr
, "unimpl: clear RT\n");
822 v3d_clear_depth_stencil(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
823 unsigned buffers
, double depth
, unsigned stencil
,
824 unsigned x
, unsigned y
, unsigned w
, unsigned h
,
825 bool render_condition_enabled
)
827 fprintf(stderr
, "unimpl: clear DS\n");
831 v3dX(draw_init
)(struct pipe_context
*pctx
)
833 pctx
->draw_vbo
= v3d_draw_vbo
;
834 pctx
->clear
= v3d_clear
;
835 pctx
->clear_render_target
= v3d_clear_render_target
;
836 pctx
->clear_depth_stencil
= v3d_clear_depth_stencil
;