+ if (SI_PRIM_DISCARD_DEBUG)
+ printf("PD failed: %s\n", s);
+ return false;
+}
+
+static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ struct pipe_resource *indexbuf = info->index.resource;
+ unsigned dirty_tex_counter, dirty_buf_counter;
+ enum pipe_prim_type rast_prim, prim = info->mode;
+ unsigned index_size = info->index_size;
+ unsigned index_offset = info->indirect ? info->start * index_size : 0;
+ unsigned instance_count = info->instance_count;
+ bool primitive_restart =
+ info->primitive_restart &&
+ (!sctx->screen->options.prim_restart_tri_strips_only ||
+ (prim != PIPE_PRIM_TRIANGLE_STRIP && prim != PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY));
+
+ if (likely(!info->indirect)) {
+ /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
+ * no workaround for indirect draws, but we can at least skip
+ * direct draws.
+ */
+ if (unlikely(!instance_count))
+ return;
+
+ /* Handle count == 0. */
+ if (unlikely(!info->count && (index_size || !info->count_from_stream_output)))
+ return;
+ }
+
+ struct si_shader_selector *vs = sctx->vs_shader.cso;
+ if (unlikely(!vs || sctx->num_vertex_elements < vs->num_vs_inputs ||
+ (!sctx->ps_shader.cso && !rs->rasterizer_discard) ||
+ (!!sctx->tes_shader.cso != (prim == PIPE_PRIM_PATCHES)))) {
+ assert(0);
+ return;
+ }
+
+ /* Recompute and re-emit the texture resource states if needed. */
+ dirty_tex_counter = p_atomic_read(&sctx->screen->dirty_tex_counter);
+ if (unlikely(dirty_tex_counter != sctx->last_dirty_tex_counter)) {
+ sctx->last_dirty_tex_counter = dirty_tex_counter;
+ sctx->framebuffer.dirty_cbufs |= ((1 << sctx->framebuffer.state.nr_cbufs) - 1);
+ sctx->framebuffer.dirty_zsbuf = true;
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
+ si_update_all_texture_descriptors(sctx);
+ }
+
+ dirty_buf_counter = p_atomic_read(&sctx->screen->dirty_buf_counter);
+ if (unlikely(dirty_buf_counter != sctx->last_dirty_buf_counter)) {
+ sctx->last_dirty_buf_counter = dirty_buf_counter;
+ /* Rebind all buffers unconditionally. */
+ si_rebind_buffer(sctx, NULL);
+ }
+
+ si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
+
+ /* Set the rasterization primitive type.
+ *
+ * This must be done after si_decompress_textures, which can call
+ * draw_vbo recursively, and before si_update_shaders, which uses
+ * current_rast_prim for this draw_vbo call. */
+ if (sctx->gs_shader.cso) {
+ /* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */
+ rast_prim = sctx->gs_shader.cso->rast_prim;
+ } else if (sctx->tes_shader.cso) {
+ /* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */
+ rast_prim = sctx->tes_shader.cso->rast_prim;
+ } else if (util_rast_prim_is_triangles(prim)) {
+ rast_prim = PIPE_PRIM_TRIANGLES;
+ } else {
+ /* Only possibilities, POINTS, LINE*, RECTANGLES */
+ rast_prim = prim;
+ }
+
+ if (rast_prim != sctx->current_rast_prim) {
+ if (util_prim_is_points_or_lines(sctx->current_rast_prim) !=
+ util_prim_is_points_or_lines(rast_prim))
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband);
+
+ sctx->current_rast_prim = rast_prim;
+ sctx->do_update_shaders = true;
+ }
+
+ if (sctx->tes_shader.cso && sctx->screen->info.has_ls_vgpr_init_bug) {
+ /* Determine whether the LS VGPR fix should be applied.
+ *
+ * It is only required when num input CPs > num output CPs,
+ * which cannot happen with the fixed function TCS. We should
+ * also update this bit when switching from TCS to fixed
+ * function TCS.
+ */
+ struct si_shader_selector *tcs = sctx->tcs_shader.cso;
+ bool ls_vgpr_fix =
+ tcs && info->vertices_per_patch > tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+
+ if (ls_vgpr_fix != sctx->ls_vgpr_fix) {
+ sctx->ls_vgpr_fix = ls_vgpr_fix;
+ sctx->do_update_shaders = true;
+ }
+ }
+
+ if (sctx->chip_class <= GFX9 && sctx->gs_shader.cso) {
+ /* Determine whether the GS triangle strip adjacency fix should
+ * be applied. Rotate every other triangle if
+ * - triangle strips with adjacency are fed to the GS and
+ * - primitive restart is disabled (the rotation doesn't help
+ * when the restart occurs after an odd number of triangles).
+ */
+ bool gs_tri_strip_adj_fix =
+ !sctx->tes_shader.cso && prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY && !primitive_restart;
+
+ if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) {
+ sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix;
+ sctx->do_update_shaders = true;
+ }
+ }
+
+ if (index_size) {
+ /* Translate or upload, if needed. */
+ /* 8-bit indices are supported on GFX8. */
+ if (sctx->chip_class <= GFX7 && index_size == 1) {
+ unsigned start, count, start_offset, size, offset;
+ void *ptr;
+
+ si_get_draw_start_count(sctx, info, &start, &count);
+ start_offset = start * 2;
+ size = count * 2;
+
+ indexbuf = NULL;
+ u_upload_alloc(ctx->stream_uploader, start_offset, size,
+ si_optimal_tcc_alignment(sctx, size), &offset, &indexbuf, &ptr);
+ if (!indexbuf)
+ return;
+
+ util_shorten_ubyte_elts_to_userptr(&sctx->b, info, 0, 0, index_offset + start, count, ptr);
+
+ /* info->start will be added by the drawing code */
+ index_offset = offset - start_offset;
+ index_size = 2;
+ } else if (info->has_user_indices) {
+ unsigned start_offset;
+
+ assert(!info->indirect);
+ start_offset = info->start * index_size;
+
+ indexbuf = NULL;
+ u_upload_data(ctx->stream_uploader, start_offset, info->count * index_size,
+ sctx->screen->info.tcc_cache_line_size,
+ (char *)info->index.user + start_offset, &index_offset, &indexbuf);
+ if (!indexbuf)
+ return;
+
+ /* info->start will be added by the drawing code */
+ index_offset -= start_offset;
+ } else if (sctx->chip_class <= GFX7 && si_resource(indexbuf)->TC_L2_dirty) {
+ /* GFX8 reads index buffers through TC L2, so it doesn't
+ * need this. */
+ sctx->flags |= SI_CONTEXT_WB_L2;
+ si_resource(indexbuf)->TC_L2_dirty = false;
+ }
+ }
+
+ bool dispatch_prim_discard_cs = false;
+ bool prim_discard_cs_instancing = false;
+ unsigned original_index_size = index_size;
+ unsigned direct_count = 0;
+
+ if (info->indirect) {
+ struct pipe_draw_indirect_info *indirect = info->indirect;
+
+ /* Add the buffer size for memory checking in need_cs_space. */
+ si_context_add_resource_size(sctx, indirect->buffer);
+
+ /* Indirect buffers use TC L2 on GFX9, but not older hw. */
+ if (sctx->chip_class <= GFX8) {
+ if (si_resource(indirect->buffer)->TC_L2_dirty) {
+ sctx->flags |= SI_CONTEXT_WB_L2;
+ si_resource(indirect->buffer)->TC_L2_dirty = false;
+ }
+
+ if (indirect->indirect_draw_count &&
+ si_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
+ sctx->flags |= SI_CONTEXT_WB_L2;
+ si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
+ }
+ }
+ } else {
+ /* Multiply by 3 for strips and fans to get an approximate vertex
+ * count as triangles. */
+ direct_count = info->count * instance_count * (prim == PIPE_PRIM_TRIANGLES ? 1 : 3);
+ }
+
+ /* Determine if we can use the primitive discard compute shader. */
+ if (si_compute_prim_discard_enabled(sctx) &&
+ (direct_count > sctx->prim_discard_vertex_count_threshold
+ ? (sctx->compute_num_verts_rejected += direct_count, true)
+ : /* Add, then return true. */
+ (sctx->compute_num_verts_ineligible += direct_count,
+ false)) && /* Add, then return false. */
+ (!info->count_from_stream_output || pd_msg("draw_opaque")) &&
+ (primitive_restart ?
+ /* Supported prim types with primitive restart: */
+ (prim == PIPE_PRIM_TRIANGLE_STRIP || pd_msg("bad prim type with primitive restart")) &&
+ /* Disallow instancing with primitive restart: */
+ (instance_count == 1 || pd_msg("instance_count > 1 with primitive restart"))
+ :
+ /* Supported prim types without primitive restart + allow instancing: */
+ (1 << prim) & ((1 << PIPE_PRIM_TRIANGLES) | (1 << PIPE_PRIM_TRIANGLE_STRIP) |
+ (1 << PIPE_PRIM_TRIANGLE_FAN)) &&
+ /* Instancing is limited to 16-bit indices, because InstanceID is packed into
+ VertexID. */
+ /* TODO: DrawArraysInstanced doesn't sometimes work, so it's disabled. */
+ (instance_count == 1 ||
+ (instance_count <= USHRT_MAX && index_size && index_size <= 2) ||
+ pd_msg("instance_count too large or index_size == 4 or DrawArraysInstanced"))) &&
+ (info->drawid == 0 || !sctx->vs_shader.cso->info.uses_drawid || pd_msg("draw_id > 0")) &&
+ (!sctx->render_cond || pd_msg("render condition")) &&
+ /* Forced enablement ignores pipeline statistics queries. */
+ (sctx->screen->debug_flags & (DBG(PD) | DBG(ALWAYS_PD)) ||
+ (!sctx->num_pipeline_stat_queries && !sctx->streamout.prims_gen_query_enabled) ||
+ pd_msg("pipestat or primgen query")) &&
+ (!sctx->vertex_elements->instance_divisor_is_fetched || pd_msg("loads instance divisors")) &&
+ (!sctx->tes_shader.cso || pd_msg("uses tess")) &&
+ (!sctx->gs_shader.cso || pd_msg("uses GS")) &&
+ (!sctx->ps_shader.cso->info.uses_primid || pd_msg("PS uses PrimID")) &&
+ !rs->polygon_mode_enabled &&
+#if SI_PRIM_DISCARD_DEBUG /* same as cso->prim_discard_cs_allowed */
+ (!sctx->vs_shader.cso->info.uses_bindless_images || pd_msg("uses bindless images")) &&
+ (!sctx->vs_shader.cso->info.uses_bindless_samplers || pd_msg("uses bindless samplers")) &&
+ (!sctx->vs_shader.cso->info.writes_memory || pd_msg("writes memory")) &&
+ (!sctx->vs_shader.cso->info.writes_viewport_index || pd_msg("writes viewport index")) &&
+ !sctx->vs_shader.cso->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] &&
+ !sctx->vs_shader.cso->so.num_outputs &&
+#else
+ (sctx->vs_shader.cso->prim_discard_cs_allowed ||
+ pd_msg("VS shader uses unsupported features")) &&
+#endif
+ /* Check that all buffers are used for read only, because compute
+ * dispatches can run ahead. */
+ (si_all_vs_resources_read_only(sctx, index_size ? indexbuf : NULL) ||
+ pd_msg("write reference"))) {
+ switch (si_prepare_prim_discard_or_split_draw(sctx, info, primitive_restart)) {
+ case SI_PRIM_DISCARD_ENABLED:
+ original_index_size = index_size;
+ prim_discard_cs_instancing = instance_count > 1;
+ dispatch_prim_discard_cs = true;
+
+ /* The compute shader changes/lowers the following: */
+ prim = PIPE_PRIM_TRIANGLES;
+ index_size = 4;
+ instance_count = 1;
+ primitive_restart = false;
+ sctx->compute_num_verts_rejected -= direct_count;
+ sctx->compute_num_verts_accepted += direct_count;
+ break;
+ case SI_PRIM_DISCARD_DISABLED:
+ break;
+ case SI_PRIM_DISCARD_DRAW_SPLIT:
+ sctx->compute_num_verts_rejected -= direct_count;
+ goto return_cleanup;
+ }
+ }
+
+ if (prim_discard_cs_instancing != sctx->prim_discard_cs_instancing) {
+ sctx->prim_discard_cs_instancing = prim_discard_cs_instancing;
+ sctx->do_update_shaders = true;
+ }
+
+ /* Update NGG culling settings. */
+ if (sctx->ngg && !dispatch_prim_discard_cs && rast_prim == PIPE_PRIM_TRIANGLES &&
+ !sctx->gs_shader.cso && /* GS doesn't support NGG culling. */
+ (sctx->screen->always_use_ngg_culling_all ||
+ (sctx->tes_shader.cso && sctx->screen->always_use_ngg_culling_tess) ||
+ /* At least 1024 non-indexed vertices (8 subgroups) are needed
+ * per draw call (no TES/GS) to enable NGG culling.
+ */
+ (!index_size && direct_count >= 1024 &&
+ (prim == PIPE_PRIM_TRIANGLES || prim == PIPE_PRIM_TRIANGLE_STRIP) &&
+ !sctx->tes_shader.cso)) &&
+ si_get_vs(sctx)->cso->ngg_culling_allowed) {
+ unsigned ngg_culling = 0;
+
+ if (rs->rasterizer_discard) {
+ ngg_culling |= SI_NGG_CULL_FRONT_FACE | SI_NGG_CULL_BACK_FACE;
+ } else {
+ /* Polygon mode can't use view and small primitive culling,
+ * because it draws points or lines where the culling depends
+ * on the point or line width.
+ */
+ if (!rs->polygon_mode_enabled)
+ ngg_culling |= SI_NGG_CULL_VIEW_SMALLPRIMS;
+
+ if (sctx->viewports.y_inverted ? rs->cull_back : rs->cull_front)
+ ngg_culling |= SI_NGG_CULL_FRONT_FACE;
+ if (sctx->viewports.y_inverted ? rs->cull_front : rs->cull_back)
+ ngg_culling |= SI_NGG_CULL_BACK_FACE;
+ }
+
+ /* Use NGG fast launch for certain non-indexed primitive types.
+ * A draw must have at least 1 full primitive.
+ */
+ if (ngg_culling && !index_size && direct_count >= 3 && !sctx->tes_shader.cso &&
+ !sctx->gs_shader.cso) {
+ if (prim == PIPE_PRIM_TRIANGLES)
+ ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST;
+ else if (prim == PIPE_PRIM_TRIANGLE_STRIP)
+ ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP;
+ }
+
+ if (ngg_culling != sctx->ngg_culling) {
+ /* Insert a VGT_FLUSH when enabling fast launch changes to prevent hangs.
+ * See issues #2418, #2426, #2434
+ */
+ if (ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL)
+ sctx->flags |= SI_CONTEXT_VGT_FLUSH;
+ sctx->ngg_culling = ngg_culling;
+ sctx->do_update_shaders = true;
+ }
+ } else if (sctx->ngg_culling) {
+ sctx->ngg_culling = false;
+ sctx->do_update_shaders = true;
+ }
+
+ if (sctx->do_update_shaders && !si_update_shaders(sctx))
+ goto return_cleanup;
+
+ si_need_gfx_cs_space(sctx);
+
+ /* If we're using a secure context, determine if cs must be secure or not */
+ if (unlikely(sctx->ws->ws_is_secure(sctx->ws))) {
+ bool secure = si_gfx_resources_check_encrypted(sctx);
+ if (secure != sctx->ws->cs_is_secure(sctx->gfx_cs)) {
+ si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
+ sctx->ws->cs_set_secure(sctx->gfx_cs, secure);
+ }
+ }
+
+ if (sctx->bo_list_add_all_gfx_resources)
+ si_gfx_resources_add_all_to_bo_list(sctx);
+
+ /* Since we've called si_context_add_resource_size for vertex buffers,
+ * this must be called after si_need_cs_space, because we must let
+ * need_cs_space flush before we add buffers to the buffer list.
+ */
+ if (!si_upload_vertex_buffer_descriptors(sctx))
+ goto return_cleanup;
+
+ /* Vega10/Raven scissor bug workaround. When any context register is
+ * written (i.e. the GPU rolls the context), PA_SC_VPORT_SCISSOR
+ * registers must be written too.
+ */
+ unsigned masked_atoms = 0;
+
+ if (sctx->screen->info.has_gfx9_scissor_bug) {
+ masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.scissors);
+
+ if (info->count_from_stream_output ||
+ sctx->dirty_atoms & si_atoms_that_always_roll_context() ||
+ sctx->dirty_states & si_states_that_always_roll_context())
+ sctx->context_roll = true;
+ }
+
+ /* Use optimal packet order based on whether we need to sync the pipeline. */
+ if (unlikely(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH))) {
+ /* If we have to wait for idle, set all states first, so that all
+ * SET packets are processed in parallel with previous draw calls.
+ * Then draw and prefetch at the end. This ensures that the time
+ * the CUs are idle is very short.
+ */
+ if (unlikely(sctx->flags & SI_CONTEXT_FLUSH_FOR_RENDER_COND))
+ masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
+
+ if (!si_upload_graphics_shader_descriptors(sctx))
+ goto return_cleanup;
+
+ /* Emit all states except possibly render condition. */
+ si_emit_all_states(sctx, info, prim, instance_count, primitive_restart, masked_atoms);
+ sctx->emit_cache_flush(sctx);
+ /* <-- CUs are idle here. */
+
+ if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond))
+ sctx->atoms.s.render_cond.emit(sctx);
+
+ if (sctx->screen->info.has_gfx9_scissor_bug &&
+ (sctx->context_roll || si_is_atom_dirty(sctx, &sctx->atoms.s.scissors)))
+ sctx->atoms.s.scissors.emit(sctx);
+
+ sctx->dirty_atoms = 0;
+
+ si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset, instance_count,
+ dispatch_prim_discard_cs, original_index_size);
+ /* <-- CUs are busy here. */
+
+ /* Start prefetches after the draw has been started. Both will run
+ * in parallel, but starting the draw first is more important.
+ */
+ if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
+ cik_emit_prefetch_L2(sctx, false);
+ } else {
+ /* If we don't wait for idle, start prefetches first, then set
+ * states, and draw at the end.
+ */
+ if (sctx->flags)
+ sctx->emit_cache_flush(sctx);
+
+ /* Only prefetch the API VS and VBO descriptors. */
+ if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
+ cik_emit_prefetch_L2(sctx, true);
+
+ if (!si_upload_graphics_shader_descriptors(sctx))
+ goto return_cleanup;
+
+ si_emit_all_states(sctx, info, prim, instance_count, primitive_restart, masked_atoms);
+
+ if (sctx->screen->info.has_gfx9_scissor_bug &&
+ (sctx->context_roll || si_is_atom_dirty(sctx, &sctx->atoms.s.scissors)))
+ sctx->atoms.s.scissors.emit(sctx);
+
+ sctx->dirty_atoms = 0;
+
+ si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset, instance_count,
+ dispatch_prim_discard_cs, original_index_size);
+
+ /* Prefetch the remaining shaders after the draw has been
+ * started. */
+ if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
+ cik_emit_prefetch_L2(sctx, false);
+ }
+
+ /* Mark the displayable dcc buffer as dirty in order to update
+ * it on the next call to si_flush_resource. */
+ if (sctx->screen->info.use_display_dcc_with_retile_blit) {
+ /* Don't use si_update_fb_dirtiness_after_rendering because it'll
+ * cause unnecessary texture decompressions on each draw. */
+ unsigned displayable_dcc_cb_mask = sctx->framebuffer.displayable_dcc_cb_mask;
+ while (displayable_dcc_cb_mask) {
+ unsigned i = u_bit_scan(&displayable_dcc_cb_mask);
+ struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
+ struct si_texture *tex = (struct si_texture *)surf->texture;
+ tex->displayable_dcc_dirty = true;
+ }
+ }
+
+ /* Clear the context roll flag after the draw call. */
+ sctx->context_roll = false;
+
+ if (unlikely(sctx->current_saved_cs)) {
+ si_trace_emit(sctx);
+ si_log_draw_state(sctx, sctx->log);
+ }
+
+ /* Workaround for a VGT hang when streamout is enabled.
+ * It must be done after drawing. */
+ if ((sctx->family == CHIP_HAWAII || sctx->family == CHIP_TONGA || sctx->family == CHIP_FIJI) &&
+ si_get_strmout_en(sctx)) {
+ sctx->flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
+ }
+
+ if (unlikely(sctx->decompression_enabled)) {
+ sctx->num_decompress_calls++;
+ } else {
+ sctx->num_draw_calls++;
+ if (sctx->framebuffer.state.nr_cbufs > 1)
+ sctx->num_mrt_draw_calls++;
+ if (primitive_restart)
+ sctx->num_prim_restart_calls++;
+ if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
+ sctx->num_spill_draw_calls++;
+ }
+
+return_cleanup:
+ if (index_size && indexbuf != info->index.resource)
+ pipe_resource_reference(&indexbuf, NULL);
+}
+
+static void si_draw_rectangle(struct blitter_context *blitter, void *vertex_elements_cso,
+ blitter_get_vs_func get_vs, int x1, int y1, int x2, int y2,
+ float depth, unsigned num_instances, enum blitter_attrib_type type,
+ const union blitter_attrib *attrib)
+{
+ struct pipe_context *pipe = util_blitter_get_pipe(blitter);
+ struct si_context *sctx = (struct si_context *)pipe;
+
+ /* Pack position coordinates as signed int16. */
+ sctx->vs_blit_sh_data[0] = (uint32_t)(x1 & 0xffff) | ((uint32_t)(y1 & 0xffff) << 16);
+ sctx->vs_blit_sh_data[1] = (uint32_t)(x2 & 0xffff) | ((uint32_t)(y2 & 0xffff) << 16);
+ sctx->vs_blit_sh_data[2] = fui(depth);
+
+ switch (type) {
+ case UTIL_BLITTER_ATTRIB_COLOR:
+ memcpy(&sctx->vs_blit_sh_data[3], attrib->color, sizeof(float) * 4);
+ break;
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XY:
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW:
+ memcpy(&sctx->vs_blit_sh_data[3], &attrib->texcoord, sizeof(attrib->texcoord));
+ break;
+ case UTIL_BLITTER_ATTRIB_NONE:;
+ }
+
+ pipe->bind_vs_state(pipe, si_get_blitter_vs(sctx, type, num_instances));
+
+ struct pipe_draw_info info = {};
+ info.mode = SI_PRIM_RECTANGLE_LIST;
+ info.count = 3;
+ info.instance_count = num_instances;
+
+ /* Don't set per-stage shader pointers for VS. */
+ sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(VERTEX);
+ sctx->vertex_buffer_pointer_dirty = false;
+ sctx->vertex_buffer_user_sgprs_dirty = false;
+
+ si_draw_vbo(pipe, &info);