2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/u_blitter.h"
25 #include "util/u_prim.h"
26 #include "util/u_format.h"
27 #include "util/u_pack_color.h"
28 #include "util/u_prim_restart.h"
29 #include "util/u_upload_mgr.h"
30 #include "indices/u_primconvert.h"
32 #include "v3d_context.h"
33 #include "v3d_resource.h"
35 #include "broadcom/compiler/v3d_compiler.h"
36 #include "broadcom/common/v3d_macros.h"
37 #include "broadcom/cle/v3dx_pack.h"
40 * Does the initial bining command list setup for drawing to a given FBO.
43 v3d_start_draw(struct v3d_context
*v3d
)
45 struct v3d_job
*job
= v3d
->job
;
50 /* Get space to emit our BCL state, using a branch to jump to a new BO
53 v3d_cl_ensure_space_with_branch(&job
->bcl
, 256 /* XXX */);
55 job
->submit
.bcl_start
= job
->bcl
.bo
->offset
;
56 v3d_job_add_bo(job
, job
->bcl
.bo
);
58 /* The PTB will request the tile alloc initial size per tile at start
61 uint32_t tile_alloc_size
= (job
->draw_tiles_x
*
62 job
->draw_tiles_y
) * 64;
63 /* The PTB allocates in aligned 4k chunks after the initial setup. */
64 tile_alloc_size
= align(tile_alloc_size
, 4096);
66 /* Include the first two chunk allocations that the PTB does so that
67 * we definitely clear the OOM condition before triggering one (the HW
68 * won't trigger OOM during the first allocations).
70 tile_alloc_size
+= 8192;
72 /* For performance, allocate some extra initial memory after the PTB's
73 * minimal allocations, so that we hopefully don't have to block the
74 * GPU on the kernel handling an OOM signal.
76 tile_alloc_size
+= 512 * 1024;
78 job
->tile_alloc
= v3d_bo_alloc(v3d
->screen
, tile_alloc_size
,
80 uint32_t tsda_per_tile_size
= v3d
->screen
->devinfo
.ver
>= 40 ? 256 : 64;
81 job
->tile_state
= v3d_bo_alloc(v3d
->screen
,
88 cl_emit(&job
->bcl
, TILE_BINNING_MODE_CFG
, config
) {
89 config
.width_in_pixels
= v3d
->framebuffer
.width
;
90 config
.height_in_pixels
= v3d
->framebuffer
.height
;
91 config
.number_of_render_targets
=
92 MAX2(v3d
->framebuffer
.nr_cbufs
, 1);
94 config
.multisample_mode_4x
= job
->msaa
;
96 config
.maximum_bpp_of_all_render_targets
= job
->internal_bpp
;
98 #else /* V3D_VERSION < 40 */
99 /* "Binning mode lists start with a Tile Binning Mode Configuration
102 * Part1 signals the end of binning config setup.
104 cl_emit(&job
->bcl
, TILE_BINNING_MODE_CFG_PART2
, config
) {
105 config
.tile_allocation_memory_address
=
106 cl_address(job
->tile_alloc
, 0);
107 config
.tile_allocation_memory_size
= job
->tile_alloc
->size
;
110 cl_emit(&job
->bcl
, TILE_BINNING_MODE_CFG_PART1
, config
) {
111 config
.tile_state_data_array_base_address
=
112 cl_address(job
->tile_state
, 0);
114 config
.width_in_tiles
= job
->draw_tiles_x
;
115 config
.height_in_tiles
= job
->draw_tiles_y
;
117 config
.number_of_render_targets
=
118 MAX2(v3d
->framebuffer
.nr_cbufs
, 1);
120 config
.multisample_mode_4x
= job
->msaa
;
122 config
.maximum_bpp_of_all_render_targets
= job
->internal_bpp
;
124 #endif /* V3D_VERSION < 40 */
126 /* There's definitely nothing in the VCD cache we want. */
127 cl_emit(&job
->bcl
, FLUSH_VCD_CACHE
, bin
);
129 /* Disable any leftover OQ state from another job. */
130 cl_emit(&job
->bcl
, OCCLUSION_QUERY_COUNTER
, counter
);
132 /* "Binning mode lists must have a Start Tile Binning item (6) after
133 * any prefix state data before the binning list proper starts."
135 cl_emit(&job
->bcl
, START_TILE_BINNING
, bin
);
137 job
->needs_flush
= true;
138 job
->draw_width
= v3d
->framebuffer
.width
;
139 job
->draw_height
= v3d
->framebuffer
.height
;
143 v3d_predraw_check_stage_inputs(struct pipe_context
*pctx
,
144 enum pipe_shader_type s
)
146 struct v3d_context
*v3d
= v3d_context(pctx
);
148 /* Flush writes to textures we're sampling. */
149 for (int i
= 0; i
< v3d
->tex
[s
].num_textures
; i
++) {
150 struct pipe_sampler_view
*pview
= v3d
->tex
[s
].textures
[i
];
153 struct v3d_sampler_view
*view
= v3d_sampler_view(pview
);
155 if (view
->texture
!= view
->base
.texture
&&
156 view
->base
.format
!= PIPE_FORMAT_X32_S8X24_UINT
)
157 v3d_update_shadow_texture(pctx
, &view
->base
);
159 v3d_flush_jobs_writing_resource(v3d
, view
->texture
, false);
162 /* Flush writes to UBOs. */
163 foreach_bit(i
, v3d
->constbuf
[s
].enabled_mask
) {
164 struct pipe_constant_buffer
*cb
= &v3d
->constbuf
[s
].cb
[i
];
166 v3d_flush_jobs_writing_resource(v3d
, cb
->buffer
, false);
169 /* Flush writes to our image views */
170 foreach_bit(i
, v3d
->shaderimg
[s
].enabled_mask
) {
171 struct v3d_image_view
*view
= &v3d
->shaderimg
[s
].si
[i
];
173 v3d_flush_jobs_writing_resource(v3d
, view
->base
.resource
,
177 /* Flush writes to our vertex buffers (i.e. from transform feedback) */
178 if (s
== PIPE_SHADER_VERTEX
) {
179 foreach_bit(i
, v3d
->vertexbuf
.enabled_mask
) {
180 struct pipe_vertex_buffer
*vb
= &v3d
->vertexbuf
.vb
[i
];
182 v3d_flush_jobs_writing_resource(v3d
, vb
->buffer
.resource
,
189 v3d_predraw_check_outputs(struct pipe_context
*pctx
)
191 struct v3d_context
*v3d
= v3d_context(pctx
);
193 /* Flush jobs reading from TF buffers that we are about to write. */
194 if (v3d_transform_feedback_enabled(v3d
)) {
195 struct v3d_streamout_stateobj
*so
= &v3d
->streamout
;
197 for (int i
= 0; i
< so
->num_targets
; i
++) {
201 const struct pipe_stream_output_target
*target
=
203 v3d_flush_jobs_reading_resource(v3d
, target
->buffer
);
209 * Checks if the state for the current draw reads a particular resource in
210 * in the given shader stage.
213 v3d_state_reads_resource(struct v3d_context
*v3d
,
214 struct pipe_resource
*prsc
,
215 enum pipe_shader_type s
)
217 struct v3d_resource
*rsc
= v3d_resource(prsc
);
220 if (s
== PIPE_SHADER_VERTEX
) {
221 foreach_bit(i
, v3d
->vertexbuf
.enabled_mask
) {
222 struct pipe_vertex_buffer
*vb
= &v3d
->vertexbuf
.vb
[i
];
223 if (!vb
->buffer
.resource
)
226 struct v3d_resource
*vb_rsc
=
227 v3d_resource(vb
->buffer
.resource
);
228 if (rsc
->bo
== vb_rsc
->bo
)
233 /* Constant buffers */
234 foreach_bit(i
, v3d
->constbuf
[s
].enabled_mask
) {
235 struct pipe_constant_buffer
*cb
= &v3d
->constbuf
[s
].cb
[i
];
239 struct v3d_resource
*cb_rsc
= v3d_resource(cb
->buffer
);
240 if (rsc
->bo
== cb_rsc
->bo
)
244 /* Shader storage buffers */
245 foreach_bit(i
, v3d
->ssbo
[s
].enabled_mask
) {
246 struct pipe_shader_buffer
*sb
= &v3d
->ssbo
[s
].sb
[i
];
250 struct v3d_resource
*sb_rsc
= v3d_resource(sb
->buffer
);
251 if (rsc
->bo
== sb_rsc
->bo
)
256 for (int i
= 0; i
< v3d
->tex
[s
].num_textures
; i
++) {
257 struct pipe_sampler_view
*pview
= v3d
->tex
[s
].textures
[i
];
261 struct v3d_sampler_view
*view
= v3d_sampler_view(pview
);
262 struct v3d_resource
*v_rsc
= v3d_resource(view
->texture
);
263 if (rsc
->bo
== v_rsc
->bo
)
271 v3d_emit_wait_for_tf(struct v3d_job
*job
)
273 /* XXX: we might be able to skip this in some cases, for now we
276 cl_emit(&job
->bcl
, FLUSH_TRANSFORM_FEEDBACK_DATA
, flush
);
278 cl_emit(&job
->bcl
, WAIT_FOR_TRANSFORM_FEEDBACK
, wait
) {
279 /* XXX: Wait for all outstanding writes... maybe we can do
280 * better in some cases.
282 wait
.block_count
= 255;
285 /* We have just flushed all our outstanding TF work in this job so make
286 * sure we don't emit TF flushes again for any of it again.
288 _mesa_set_clear(job
->tf_write_prscs
, NULL
);
292 v3d_emit_wait_for_tf_if_needed(struct v3d_context
*v3d
, struct v3d_job
*job
)
294 if (!job
->tf_enabled
)
297 set_foreach(job
->tf_write_prscs
, entry
) {
298 struct pipe_resource
*prsc
= (struct pipe_resource
*)entry
->key
;
299 for (int s
= 0; s
< PIPE_SHADER_COMPUTE
; s
++) {
300 /* Fragment shaders can only start executing after all
301 * binning (and thus TF) is complete.
303 * XXX: For VS/GS/TES, if the binning shader does not
304 * read the resource then we could also avoid emitting
307 if (s
== PIPE_SHADER_FRAGMENT
)
310 if (v3d_state_reads_resource(v3d
, prsc
, s
)) {
311 v3d_emit_wait_for_tf(job
);
319 v3d_emit_gl_shader_state(struct v3d_context
*v3d
,
320 const struct pipe_draw_info
*info
)
322 struct v3d_job
*job
= v3d
->job
;
323 /* VC5_DIRTY_VTXSTATE */
324 struct v3d_vertex_stateobj
*vtx
= v3d
->vtx
;
325 /* VC5_DIRTY_VTXBUF */
326 struct v3d_vertexbuf_stateobj
*vertexbuf
= &v3d
->vertexbuf
;
328 /* Upload the uniforms to the indirect CL first */
329 struct v3d_cl_reloc fs_uniforms
=
330 v3d_write_uniforms(v3d
, v3d
->prog
.fs
,
331 PIPE_SHADER_FRAGMENT
);
332 struct v3d_cl_reloc vs_uniforms
=
333 v3d_write_uniforms(v3d
, v3d
->prog
.vs
,
335 struct v3d_cl_reloc cs_uniforms
=
336 v3d_write_uniforms(v3d
, v3d
->prog
.cs
,
339 /* See GFXH-930 workaround below */
340 uint32_t num_elements_to_emit
= MAX2(vtx
->num_elements
, 1);
341 uint32_t shader_rec_offset
=
342 v3d_cl_ensure_space(&job
->indirect
,
343 cl_packet_length(GL_SHADER_STATE_RECORD
) +
344 num_elements_to_emit
*
345 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD
),
348 /* XXX perf: We should move most of the SHADER_STATE_RECORD setup to
349 * compile time, so that we mostly just have to OR the VS and FS
350 * records together at draw time.
352 cl_emit(&job
->indirect
, GL_SHADER_STATE_RECORD
, shader
) {
353 shader
.enable_clipping
= true;
354 /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
355 shader
.point_size_in_shaded_vertex_data
=
356 (info
->mode
== PIPE_PRIM_POINTS
&&
357 v3d
->rasterizer
->base
.point_size_per_vertex
);
359 /* Must be set if the shader modifies Z, discards, or modifies
360 * the sample mask. For any of these cases, the fragment
361 * shader needs to write the Z value (even just discards).
363 shader
.fragment_shader_does_z_writes
=
364 v3d
->prog
.fs
->prog_data
.fs
->writes_z
;
365 /* Set if the EZ test must be disabled (due to shader side
366 * effects and the early_z flag not being present in the
369 shader
.turn_off_early_z_test
=
370 v3d
->prog
.fs
->prog_data
.fs
->disable_ez
;
372 shader
.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2
=
373 v3d
->prog
.fs
->prog_data
.fs
->uses_center_w
;
375 #if V3D_VERSION >= 40
376 shader
.do_scoreboard_wait_on_first_thread_switch
=
377 v3d
->prog
.fs
->prog_data
.fs
->lock_scoreboard_on_first_thrsw
;
378 shader
.disable_implicit_point_line_varyings
=
379 !v3d
->prog
.fs
->prog_data
.fs
->uses_implicit_point_line_varyings
;
382 shader
.number_of_varyings_in_fragment_shader
=
383 v3d
->prog
.fs
->prog_data
.fs
->num_inputs
;
385 shader
.coordinate_shader_propagate_nans
= true;
386 shader
.vertex_shader_propagate_nans
= true;
387 shader
.fragment_shader_propagate_nans
= true;
389 shader
.coordinate_shader_code_address
=
390 cl_address(v3d_resource(v3d
->prog
.cs
->resource
)->bo
,
391 v3d
->prog
.cs
->offset
);
392 shader
.vertex_shader_code_address
=
393 cl_address(v3d_resource(v3d
->prog
.vs
->resource
)->bo
,
394 v3d
->prog
.vs
->offset
);
395 shader
.fragment_shader_code_address
=
396 cl_address(v3d_resource(v3d
->prog
.fs
->resource
)->bo
,
397 v3d
->prog
.fs
->offset
);
399 /* XXX: Use combined input/output size flag in the common
402 shader
.coordinate_shader_has_separate_input_and_output_vpm_blocks
=
403 v3d
->prog
.cs
->prog_data
.vs
->separate_segments
;
404 shader
.vertex_shader_has_separate_input_and_output_vpm_blocks
=
405 v3d
->prog
.vs
->prog_data
.vs
->separate_segments
;
407 shader
.coordinate_shader_input_vpm_segment_size
=
408 v3d
->prog
.cs
->prog_data
.vs
->separate_segments
?
409 v3d
->prog
.cs
->prog_data
.vs
->vpm_input_size
: 1;
410 shader
.vertex_shader_input_vpm_segment_size
=
411 v3d
->prog
.vs
->prog_data
.vs
->separate_segments
?
412 v3d
->prog
.vs
->prog_data
.vs
->vpm_input_size
: 1;
414 shader
.coordinate_shader_output_vpm_segment_size
=
415 v3d
->prog
.cs
->prog_data
.vs
->vpm_output_size
;
416 shader
.vertex_shader_output_vpm_segment_size
=
417 v3d
->prog
.vs
->prog_data
.vs
->vpm_output_size
;
419 shader
.coordinate_shader_uniforms_address
= cs_uniforms
;
420 shader
.vertex_shader_uniforms_address
= vs_uniforms
;
421 shader
.fragment_shader_uniforms_address
= fs_uniforms
;
423 #if V3D_VERSION >= 41
424 shader
.min_coord_shader_input_segments_required_in_play
= 1;
425 shader
.min_vertex_shader_input_segments_required_in_play
= 1;
427 shader
.coordinate_shader_4_way_threadable
=
428 v3d
->prog
.cs
->prog_data
.vs
->base
.threads
== 4;
429 shader
.vertex_shader_4_way_threadable
=
430 v3d
->prog
.vs
->prog_data
.vs
->base
.threads
== 4;
431 shader
.fragment_shader_4_way_threadable
=
432 v3d
->prog
.fs
->prog_data
.fs
->base
.threads
== 4;
434 shader
.coordinate_shader_start_in_final_thread_section
=
435 v3d
->prog
.cs
->prog_data
.vs
->base
.single_seg
;
436 shader
.vertex_shader_start_in_final_thread_section
=
437 v3d
->prog
.vs
->prog_data
.vs
->base
.single_seg
;
438 shader
.fragment_shader_start_in_final_thread_section
=
439 v3d
->prog
.fs
->prog_data
.fs
->base
.single_seg
;
441 shader
.coordinate_shader_4_way_threadable
=
442 v3d
->prog
.cs
->prog_data
.vs
->base
.threads
== 4;
443 shader
.coordinate_shader_2_way_threadable
=
444 v3d
->prog
.cs
->prog_data
.vs
->base
.threads
== 2;
445 shader
.vertex_shader_4_way_threadable
=
446 v3d
->prog
.vs
->prog_data
.vs
->base
.threads
== 4;
447 shader
.vertex_shader_2_way_threadable
=
448 v3d
->prog
.vs
->prog_data
.vs
->base
.threads
== 2;
449 shader
.fragment_shader_4_way_threadable
=
450 v3d
->prog
.fs
->prog_data
.fs
->base
.threads
== 4;
451 shader
.fragment_shader_2_way_threadable
=
452 v3d
->prog
.fs
->prog_data
.fs
->base
.threads
== 2;
455 shader
.vertex_id_read_by_coordinate_shader
=
456 v3d
->prog
.cs
->prog_data
.vs
->uses_vid
;
457 shader
.instance_id_read_by_coordinate_shader
=
458 v3d
->prog
.cs
->prog_data
.vs
->uses_iid
;
459 shader
.vertex_id_read_by_vertex_shader
=
460 v3d
->prog
.vs
->prog_data
.vs
->uses_vid
;
461 shader
.instance_id_read_by_vertex_shader
=
462 v3d
->prog
.vs
->prog_data
.vs
->uses_iid
;
464 shader
.address_of_default_attribute_values
=
465 cl_address(v3d_resource(vtx
->defaults
)->bo
,
466 vtx
->defaults_offset
);
469 bool cs_loaded_any
= false;
470 for (int i
= 0; i
< vtx
->num_elements
; i
++) {
471 struct pipe_vertex_element
*elem
= &vtx
->pipe
[i
];
472 struct pipe_vertex_buffer
*vb
=
473 &vertexbuf
->vb
[elem
->vertex_buffer_index
];
474 struct v3d_resource
*rsc
= v3d_resource(vb
->buffer
.resource
);
476 const uint32_t size
=
477 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD
);
478 cl_emit_with_prepacked(&job
->indirect
,
479 GL_SHADER_STATE_ATTRIBUTE_RECORD
,
480 &vtx
->attrs
[i
* size
], attr
) {
481 attr
.stride
= vb
->stride
;
482 attr
.address
= cl_address(rsc
->bo
,
485 attr
.number_of_values_read_by_coordinate_shader
=
486 v3d
->prog
.cs
->prog_data
.vs
->vattr_sizes
[i
];
487 attr
.number_of_values_read_by_vertex_shader
=
488 v3d
->prog
.vs
->prog_data
.vs
->vattr_sizes
[i
];
490 /* GFXH-930: At least one attribute must be enabled
491 * and read by CS and VS. If we have attributes being
492 * consumed by the VS but not the CS, then set up a
493 * dummy load of the last attribute into the CS's VPM
494 * inputs. (Since CS is just dead-code-elimination
495 * compared to VS, we can't have CS loading but not
498 if (v3d
->prog
.cs
->prog_data
.vs
->vattr_sizes
[i
])
499 cs_loaded_any
= true;
500 if (i
== vtx
->num_elements
- 1 && !cs_loaded_any
) {
501 attr
.number_of_values_read_by_coordinate_shader
= 1;
503 #if V3D_VERSION >= 41
504 attr
.maximum_index
= 0xffffff;
507 STATIC_ASSERT(sizeof(vtx
->attrs
) >= V3D_MAX_VS_INPUTS
/ 4 * size
);
510 if (vtx
->num_elements
== 0) {
511 /* GFXH-930: At least one attribute must be enabled and read
512 * by CS and VS. If we have no attributes being consumed by
513 * the shader, set up a dummy to be loaded into the VPM.
515 cl_emit(&job
->indirect
, GL_SHADER_STATE_ATTRIBUTE_RECORD
, attr
) {
516 /* Valid address of data whose value will be unused. */
517 attr
.address
= cl_address(job
->indirect
.bo
, 0);
519 attr
.type
= ATTRIBUTE_FLOAT
;
523 attr
.number_of_values_read_by_coordinate_shader
= 1;
524 attr
.number_of_values_read_by_vertex_shader
= 1;
528 cl_emit(&job
->bcl
, VCM_CACHE_SIZE
, vcm
) {
529 vcm
.number_of_16_vertex_batches_for_binning
=
530 v3d
->prog
.cs
->prog_data
.vs
->vcm_cache_size
;
531 vcm
.number_of_16_vertex_batches_for_rendering
=
532 v3d
->prog
.vs
->prog_data
.vs
->vcm_cache_size
;
535 cl_emit(&job
->bcl
, GL_SHADER_STATE
, state
) {
536 state
.address
= cl_address(job
->indirect
.bo
, shader_rec_offset
);
537 state
.number_of_attribute_arrays
= num_elements_to_emit
;
540 v3d_bo_unreference(&cs_uniforms
.bo
);
541 v3d_bo_unreference(&vs_uniforms
.bo
);
542 v3d_bo_unreference(&fs_uniforms
.bo
);
544 job
->shader_rec_count
++;
548 * Computes the various transform feedback statistics, since they can't be
549 * recorded by CL packets.
552 v3d_tf_statistics_record(struct v3d_context
*v3d
,
553 const struct pipe_draw_info
*info
,
556 if (!v3d
->active_queries
)
559 uint32_t prims
= u_prims_for_vertices(info
->mode
, info
->count
);
560 v3d
->prims_generated
+= prims
;
563 /* XXX: Only count if we didn't overflow. */
564 v3d
->tf_prims_generated
+= prims
;
569 v3d_update_job_ez(struct v3d_context
*v3d
, struct v3d_job
*job
)
571 switch (v3d
->zsa
->ez_state
) {
572 case VC5_EZ_UNDECIDED
:
573 /* If the Z/S state didn't pick a direction but didn't
574 * disable, then go along with the current EZ state. This
575 * allows EZ optimization for Z func == EQUAL or NEVER.
581 /* If the Z/S state picked a direction, then it needs to match
582 * the current direction if we've decided on one.
584 if (job
->ez_state
== VC5_EZ_UNDECIDED
)
585 job
->ez_state
= v3d
->zsa
->ez_state
;
586 else if (job
->ez_state
!= v3d
->zsa
->ez_state
)
587 job
->ez_state
= VC5_EZ_DISABLED
;
590 case VC5_EZ_DISABLED
:
591 /* If the current Z/S state disables EZ because of a bad Z
592 * func or stencil operation, then we can't do any more EZ in
595 job
->ez_state
= VC5_EZ_DISABLED
;
599 /* If the FS affects the Z of the pixels, then it may update against
600 * the chosen EZ direction (though we could use
601 * ARB_conservative_depth's hints to avoid this)
603 if (v3d
->prog
.fs
->prog_data
.fs
->writes_z
) {
604 job
->ez_state
= VC5_EZ_DISABLED
;
607 if (job
->first_ez_state
== VC5_EZ_UNDECIDED
&&
608 (job
->ez_state
!= VC5_EZ_DISABLED
|| job
->draw_calls_queued
== 0))
609 job
->first_ez_state
= job
->ez_state
;
613 v3d_draw_vbo(struct pipe_context
*pctx
, const struct pipe_draw_info
*info
)
615 struct v3d_context
*v3d
= v3d_context(pctx
);
617 if (!info
->count_from_stream_output
&& !info
->indirect
&&
618 !info
->primitive_restart
&&
619 !u_trim_pipe_prim(info
->mode
, (unsigned*)&info
->count
))
622 /* Fall back for weird desktop GL primitive restart values. */
623 if (info
->primitive_restart
&&
627 switch (info
->index_size
) {
636 if (info
->restart_index
!= mask
) {
637 util_draw_vbo_without_prim_restart(pctx
, info
);
642 if (info
->mode
>= PIPE_PRIM_QUADS
) {
643 util_primconvert_save_rasterizer_state(v3d
->primconvert
, &v3d
->rasterizer
->base
);
644 util_primconvert_draw_vbo(v3d
->primconvert
, info
);
645 perf_debug("Fallback conversion for %d %s vertices\n",
646 info
->count
, u_prim_name(info
->mode
));
650 /* Before setting up the draw, flush anything writing to the resources
651 * that we read from or reading from resources we write to.
653 for (int s
= 0; s
< PIPE_SHADER_COMPUTE
; s
++)
654 v3d_predraw_check_stage_inputs(pctx
, s
);
656 if (info
->indirect
) {
657 v3d_flush_jobs_writing_resource(v3d
, info
->indirect
->buffer
,
661 v3d_predraw_check_outputs(pctx
);
663 struct v3d_job
*job
= v3d_get_job_for_fbo(v3d
);
665 /* If vertex texturing depends on the output of rendering, we need to
666 * ensure that that rendering is complete before we run a coordinate
667 * shader that depends on it.
669 * Given that doing that is unusual, for now we just block the binner
670 * on the last submitted render, rather than tracking the last
671 * rendering to each texture's BO.
673 if (v3d
->tex
[PIPE_SHADER_VERTEX
].num_textures
|| info
->indirect
) {
674 perf_debug("Blocking binner on last render "
675 "due to vertex texturing or indirect drawing.\n");
676 job
->submit
.in_sync_bcl
= v3d
->out_sync
;
679 /* Mark SSBOs as being written. We don't actually know which ones are
680 * read vs written, so just assume the worst
682 for (int s
= 0; s
< PIPE_SHADER_COMPUTE
; s
++) {
683 foreach_bit(i
, v3d
->ssbo
[s
].enabled_mask
) {
684 v3d_job_add_write_resource(job
,
685 v3d
->ssbo
[s
].sb
[i
].buffer
);
686 job
->tmu_dirty_rcl
= true;
689 foreach_bit(i
, v3d
->shaderimg
[s
].enabled_mask
) {
690 v3d_job_add_write_resource(job
,
691 v3d
->shaderimg
[s
].si
[i
].base
.resource
);
692 job
->tmu_dirty_rcl
= true;
696 /* Get space to emit our draw call into the BCL, using a branch to
697 * jump to a new BO if necessary.
699 v3d_cl_ensure_space_with_branch(&job
->bcl
, 256 /* XXX */);
701 if (v3d
->prim_mode
!= info
->mode
) {
702 v3d
->prim_mode
= info
->mode
;
703 v3d
->dirty
|= VC5_DIRTY_PRIM_MODE
;
707 v3d_update_compiled_shaders(v3d
, info
->mode
);
708 v3d_update_job_ez(v3d
, job
);
710 /* If this job was writing to transform feedback buffers before this
711 * draw and we are reading from them here, then we need to wait for TF
712 * to complete before we emit this draw.
714 * Notice this check needs to happen before we emit state for the
715 * current draw call, where we update job->tf_enabled, so we can ensure
716 * that we only check TF writes for prior draws.
718 v3d_emit_wait_for_tf_if_needed(v3d
, job
);
720 #if V3D_VERSION >= 41
721 v3d41_emit_state(pctx
);
723 v3d33_emit_state(pctx
);
726 if (v3d
->dirty
& (VC5_DIRTY_VTXBUF
|
728 VC5_DIRTY_PRIM_MODE
|
729 VC5_DIRTY_RASTERIZER
|
730 VC5_DIRTY_COMPILED_CS
|
731 VC5_DIRTY_COMPILED_VS
|
732 VC5_DIRTY_COMPILED_FS
|
733 v3d
->prog
.cs
->uniform_dirty_bits
|
734 v3d
->prog
.vs
->uniform_dirty_bits
|
735 v3d
->prog
.fs
->uniform_dirty_bits
)) {
736 v3d_emit_gl_shader_state(v3d
, info
);
741 /* The Base Vertex/Base Instance packet sets those values to nonzero
742 * for the next draw call only.
744 if (info
->index_bias
|| info
->start_instance
) {
745 cl_emit(&job
->bcl
, BASE_VERTEX_BASE_INSTANCE
, base
) {
746 base
.base_instance
= info
->start_instance
;
747 base
.base_vertex
= info
->index_bias
;
751 uint32_t prim_tf_enable
= 0;
753 /* V3D 3.x: The HW only processes transform feedback on primitives
756 if (v3d
->streamout
.num_targets
)
757 prim_tf_enable
= (V3D_PRIM_POINTS_TF
- V3D_PRIM_POINTS
);
760 v3d_tf_statistics_record(v3d
, info
, v3d
->streamout
.num_targets
);
762 /* Note that the primitive type fields match with OpenGL/gallium
763 * definitions, up to but not including QUADS.
765 if (info
->index_size
) {
766 uint32_t index_size
= info
->index_size
;
767 uint32_t offset
= info
->start
* index_size
;
768 struct pipe_resource
*prsc
;
769 if (info
->has_user_indices
) {
771 u_upload_data(v3d
->uploader
, 0,
772 info
->count
* info
->index_size
, 4,
776 prsc
= info
->index
.resource
;
778 struct v3d_resource
*rsc
= v3d_resource(prsc
);
780 #if V3D_VERSION >= 40
781 cl_emit(&job
->bcl
, INDEX_BUFFER_SETUP
, ib
) {
782 ib
.address
= cl_address(rsc
->bo
, 0);
783 ib
.size
= rsc
->bo
->size
;
787 if (info
->indirect
) {
788 cl_emit(&job
->bcl
, INDIRECT_INDEXED_INSTANCED_PRIM_LIST
, prim
) {
789 prim
.index_type
= ffs(info
->index_size
) - 1;
791 prim
.address_of_indices_list
=
792 cl_address(rsc
->bo
, offset
);
793 #endif /* V3D_VERSION < 40 */
794 prim
.mode
= info
->mode
| prim_tf_enable
;
795 prim
.enable_primitive_restarts
= info
->primitive_restart
;
797 prim
.number_of_draw_indirect_indexed_records
= info
->indirect
->draw_count
;
799 prim
.stride_in_multiples_of_4_bytes
= info
->indirect
->stride
>> 2;
800 prim
.address
= cl_address(v3d_resource(info
->indirect
->buffer
)->bo
,
801 info
->indirect
->offset
);
803 } else if (info
->instance_count
> 1) {
804 cl_emit(&job
->bcl
, INDEXED_INSTANCED_PRIM_LIST
, prim
) {
805 prim
.index_type
= ffs(info
->index_size
) - 1;
806 #if V3D_VERSION >= 40
807 prim
.index_offset
= offset
;
808 #else /* V3D_VERSION < 40 */
809 prim
.maximum_index
= (1u << 31) - 1; /* XXX */
810 prim
.address_of_indices_list
=
811 cl_address(rsc
->bo
, offset
);
812 #endif /* V3D_VERSION < 40 */
813 prim
.mode
= info
->mode
| prim_tf_enable
;
814 prim
.enable_primitive_restarts
= info
->primitive_restart
;
816 prim
.number_of_instances
= info
->instance_count
;
817 prim
.instance_length
= info
->count
;
820 cl_emit(&job
->bcl
, INDEXED_PRIM_LIST
, prim
) {
821 prim
.index_type
= ffs(info
->index_size
) - 1;
822 prim
.length
= info
->count
;
823 #if V3D_VERSION >= 40
824 prim
.index_offset
= offset
;
825 #else /* V3D_VERSION < 40 */
826 prim
.maximum_index
= (1u << 31) - 1; /* XXX */
827 prim
.address_of_indices_list
=
828 cl_address(rsc
->bo
, offset
);
829 #endif /* V3D_VERSION < 40 */
830 prim
.mode
= info
->mode
| prim_tf_enable
;
831 prim
.enable_primitive_restarts
= info
->primitive_restart
;
835 job
->draw_calls_queued
++;
837 if (info
->has_user_indices
)
838 pipe_resource_reference(&prsc
, NULL
);
840 if (info
->indirect
) {
841 cl_emit(&job
->bcl
, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS
, prim
) {
842 prim
.mode
= info
->mode
| prim_tf_enable
;
843 prim
.number_of_draw_indirect_array_records
= info
->indirect
->draw_count
;
845 prim
.stride_in_multiples_of_4_bytes
= info
->indirect
->stride
>> 2;
846 prim
.address
= cl_address(v3d_resource(info
->indirect
->buffer
)->bo
,
847 info
->indirect
->offset
);
849 } else if (info
->instance_count
> 1) {
850 cl_emit(&job
->bcl
, VERTEX_ARRAY_INSTANCED_PRIMS
, prim
) {
851 prim
.mode
= info
->mode
| prim_tf_enable
;
852 prim
.index_of_first_vertex
= info
->start
;
853 prim
.number_of_instances
= info
->instance_count
;
854 prim
.instance_length
= info
->count
;
857 cl_emit(&job
->bcl
, VERTEX_ARRAY_PRIMS
, prim
) {
858 prim
.mode
= info
->mode
| prim_tf_enable
;
859 prim
.length
= info
->count
;
860 prim
.index_of_first_vertex
= info
->start
;
865 /* A flush is required in between a TF draw and any following TF specs
866 * packet, or the GPU may hang. Just flush each time for now.
868 if (v3d
->streamout
.num_targets
)
869 cl_emit(&job
->bcl
, TRANSFORM_FEEDBACK_FLUSH_AND_COUNT
, flush
);
871 job
->draw_calls_queued
++;
873 /* Increment the TF offsets by how many verts we wrote. XXX: This
874 * needs some clamping to the buffer size.
876 for (int i
= 0; i
< v3d
->streamout
.num_targets
; i
++)
877 v3d
->streamout
.offsets
[i
] += info
->count
;
879 if (v3d
->zsa
&& job
->zsbuf
&& v3d
->zsa
->base
.depth
.enabled
) {
880 struct v3d_resource
*rsc
= v3d_resource(job
->zsbuf
->texture
);
881 v3d_job_add_bo(job
, rsc
->bo
);
883 job
->load
|= PIPE_CLEAR_DEPTH
& ~job
->clear
;
884 if (v3d
->zsa
->base
.depth
.writemask
)
885 job
->store
|= PIPE_CLEAR_DEPTH
;
886 rsc
->initialized_buffers
= PIPE_CLEAR_DEPTH
;
889 if (v3d
->zsa
&& job
->zsbuf
&& v3d
->zsa
->base
.stencil
[0].enabled
) {
890 struct v3d_resource
*rsc
= v3d_resource(job
->zsbuf
->texture
);
891 if (rsc
->separate_stencil
)
892 rsc
= rsc
->separate_stencil
;
894 v3d_job_add_bo(job
, rsc
->bo
);
896 job
->load
|= PIPE_CLEAR_STENCIL
& ~job
->clear
;
897 if (v3d
->zsa
->base
.stencil
[0].writemask
||
898 v3d
->zsa
->base
.stencil
[1].writemask
) {
899 job
->store
|= PIPE_CLEAR_STENCIL
;
901 rsc
->initialized_buffers
|= PIPE_CLEAR_STENCIL
;
904 for (int i
= 0; i
< V3D_MAX_DRAW_BUFFERS
; i
++) {
905 uint32_t bit
= PIPE_CLEAR_COLOR0
<< i
;
906 int blend_rt
= v3d
->blend
->base
.independent_blend_enable
? i
: 0;
908 if (job
->store
& bit
|| !job
->cbufs
[i
])
910 struct v3d_resource
*rsc
= v3d_resource(job
->cbufs
[i
]->texture
);
912 job
->load
|= bit
& ~job
->clear
;
913 if (v3d
->blend
->base
.rt
[blend_rt
].colormask
)
915 v3d_job_add_bo(job
, rsc
->bo
);
918 if (job
->referenced_size
> 768 * 1024 * 1024) {
919 perf_debug("Flushing job with %dkb to try to free up memory\n",
920 job
->referenced_size
/ 1024);
924 if (V3D_DEBUG
& V3D_DEBUG_ALWAYS_FLUSH
)
929 * Implements gallium's clear() hook (glClear()) by drawing a pair of triangles.
932 v3d_draw_clear(struct v3d_context
*v3d
,
934 const union pipe_color_union
*color
,
935 double depth
, unsigned stencil
)
937 static const union pipe_color_union dummy_color
= {};
939 /* The blitter util dereferences the color regardless, even though the
940 * gallium clear API may not pass one in when only Z/S are cleared.
943 color
= &dummy_color
;
945 v3d_blitter_save(v3d
);
946 util_blitter_clear(v3d
->blitter
,
947 v3d
->framebuffer
.width
,
948 v3d
->framebuffer
.height
,
949 util_framebuffer_get_num_layers(&v3d
->framebuffer
),
950 buffers
, color
, depth
, stencil
);
954 * Attempts to perform the GL clear by using the TLB's fast clear at the start
958 v3d_tlb_clear(struct v3d_job
*job
, unsigned buffers
,
959 const union pipe_color_union
*color
,
960 double depth
, unsigned stencil
)
962 struct v3d_context
*v3d
= job
->v3d
;
964 if (job
->draw_calls_queued
) {
965 /* If anything in the CL has drawn using the buffer, then the
966 * TLB clear we're trying to add now would happen before that
969 buffers
&= ~(job
->load
| job
->store
);
972 /* GFXH-1461: If we were to emit a load of just depth or just stencil,
973 * then the clear for the other may get lost. We need to decide now
974 * if it would be possible to need to emit a load of just one after
975 * we've set up our TLB clears.
977 if (buffers
& PIPE_CLEAR_DEPTHSTENCIL
&&
978 (buffers
& PIPE_CLEAR_DEPTHSTENCIL
) != PIPE_CLEAR_DEPTHSTENCIL
&&
980 util_format_is_depth_and_stencil(job
->zsbuf
->texture
->format
)) {
981 buffers
&= ~PIPE_CLEAR_DEPTHSTENCIL
;
984 for (int i
= 0; i
< V3D_MAX_DRAW_BUFFERS
; i
++) {
985 uint32_t bit
= PIPE_CLEAR_COLOR0
<< i
;
986 if (!(buffers
& bit
))
989 struct pipe_surface
*psurf
= v3d
->framebuffer
.cbufs
[i
];
990 struct v3d_surface
*surf
= v3d_surface(psurf
);
991 struct v3d_resource
*rsc
= v3d_resource(psurf
->texture
);
994 uint32_t internal_size
= 4 << surf
->internal_bpp
;
996 static union pipe_color_union swapped_color
;
997 if (v3d
->swap_color_rb
& (1 << i
)) {
998 swapped_color
.f
[0] = color
->f
[2];
999 swapped_color
.f
[1] = color
->f
[1];
1000 swapped_color
.f
[2] = color
->f
[0];
1001 swapped_color
.f
[3] = color
->f
[3];
1002 color
= &swapped_color
;
1005 switch (surf
->internal_type
) {
1006 case V3D_INTERNAL_TYPE_8
:
1007 util_pack_color(color
->f
, PIPE_FORMAT_R8G8B8A8_UNORM
,
1009 memcpy(job
->clear_color
[i
], uc
.ui
, internal_size
);
1011 case V3D_INTERNAL_TYPE_8I
:
1012 case V3D_INTERNAL_TYPE_8UI
:
1013 job
->clear_color
[i
][0] = ((color
->ui
[0] & 0xff) |
1014 (color
->ui
[1] & 0xff) << 8 |
1015 (color
->ui
[2] & 0xff) << 16 |
1016 (color
->ui
[3] & 0xff) << 24);
1018 case V3D_INTERNAL_TYPE_16F
:
1019 util_pack_color(color
->f
, PIPE_FORMAT_R16G16B16A16_FLOAT
,
1021 memcpy(job
->clear_color
[i
], uc
.ui
, internal_size
);
1023 case V3D_INTERNAL_TYPE_16I
:
1024 case V3D_INTERNAL_TYPE_16UI
:
1025 job
->clear_color
[i
][0] = ((color
->ui
[0] & 0xffff) |
1026 color
->ui
[1] << 16);
1027 job
->clear_color
[i
][1] = ((color
->ui
[2] & 0xffff) |
1028 color
->ui
[3] << 16);
1030 case V3D_INTERNAL_TYPE_32F
:
1031 case V3D_INTERNAL_TYPE_32I
:
1032 case V3D_INTERNAL_TYPE_32UI
:
1033 memcpy(job
->clear_color
[i
], color
->ui
, internal_size
);
1037 rsc
->initialized_buffers
|= bit
;
1040 unsigned zsclear
= buffers
& PIPE_CLEAR_DEPTHSTENCIL
;
1042 struct v3d_resource
*rsc
=
1043 v3d_resource(v3d
->framebuffer
.zsbuf
->texture
);
1045 if (zsclear
& PIPE_CLEAR_DEPTH
)
1046 job
->clear_z
= depth
;
1047 if (zsclear
& PIPE_CLEAR_STENCIL
)
1048 job
->clear_s
= stencil
;
1050 rsc
->initialized_buffers
|= zsclear
;
1053 job
->draw_min_x
= 0;
1054 job
->draw_min_y
= 0;
1055 job
->draw_max_x
= v3d
->framebuffer
.width
;
1056 job
->draw_max_y
= v3d
->framebuffer
.height
;
1057 job
->clear
|= buffers
;
1058 job
->store
|= buffers
;
1060 v3d_start_draw(v3d
);
1066 v3d_clear(struct pipe_context
*pctx
, unsigned buffers
,
1067 const union pipe_color_union
*color
, double depth
, unsigned stencil
)
1069 struct v3d_context
*v3d
= v3d_context(pctx
);
1070 struct v3d_job
*job
= v3d_get_job_for_fbo(v3d
);
1072 buffers
&= ~v3d_tlb_clear(job
, buffers
, color
, depth
, stencil
);
1075 v3d_draw_clear(v3d
, buffers
, color
, depth
, stencil
);
1079 v3d_clear_render_target(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
1080 const union pipe_color_union
*color
,
1081 unsigned x
, unsigned y
, unsigned w
, unsigned h
,
1082 bool render_condition_enabled
)
1084 fprintf(stderr
, "unimpl: clear RT\n");
1088 v3d_clear_depth_stencil(struct pipe_context
*pctx
, struct pipe_surface
*ps
,
1089 unsigned buffers
, double depth
, unsigned stencil
,
1090 unsigned x
, unsigned y
, unsigned w
, unsigned h
,
1091 bool render_condition_enabled
)
1093 fprintf(stderr
, "unimpl: clear DS\n");
1097 v3dX(draw_init
)(struct pipe_context
*pctx
)
1099 pctx
->draw_vbo
= v3d_draw_vbo
;
1100 pctx
->clear
= v3d_clear
;
1101 pctx
->clear_render_target
= v3d_clear_render_target
;
1102 pctx
->clear_depth_stencil
= v3d_clear_depth_stencil
;