src/gallium/drivers/vc4/vc4_draw.c

   1 /*
   2  * Copyright (c) 2014 Scott Mansell
   3  * Copyright © 2014 Broadcom
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  */
  24
  25 #include "util/u_blitter.h"
  26 #include "util/u_prim.h"
  27 #include "util/u_format.h"
  28 #include "util/u_pack_color.h"
  29 #include "util/u_upload_mgr.h"
  30 #include "indices/u_primconvert.h"
  31
  32 #include "vc4_context.h"
  33 #include "vc4_resource.h"
  34
  35 #define VC4_HW_2116_COUNT               0x1ef0
  36
  37 static void
  38 vc4_get_draw_cl_space(struct vc4_job *job, int vert_count)
  39 {
  40         /* The SW-5891 workaround may cause us to emit multiple shader recs
  41          * and draw packets.
  42          */
  43         int num_draws = DIV_ROUND_UP(vert_count, 65535) + 1;
  44
  45         /* Binner gets our packet state -- vc4_emit.c contents,
  46          * and the primitive itself.
  47          */
  48         cl_ensure_space(&job->bcl,
  49                         256 + (VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE +
  50                                VC4_PACKET_GL_SHADER_STATE_SIZE) * num_draws);
  51
  52         /* Nothing for rcl -- that's covered by vc4_context.c */
  53
  54         /* shader_rec gets up to 12 dwords of reloc handles plus a maximally
  55          * sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of
  56          * vattr stride).
  57          */
  58         cl_ensure_space(&job->shader_rec,
  59                         (12 * sizeof(uint32_t) + 104 + 8 * 32) * num_draws);
  60
  61         /* Uniforms are covered by vc4_write_uniforms(). */
  62
  63         /* There could be up to 16 textures per stage, plus misc other
  64          * pointers.
  65          */
  66         cl_ensure_space(&job->bo_handles, (2 * 16 + 20) * sizeof(uint32_t));
  67         cl_ensure_space(&job->bo_pointers,
  68                         (2 * 16 + 20) * sizeof(struct vc4_bo *));
  69 }
  70
  71 /**
  72  * Does the initial bining command list setup for drawing to a given FBO.
  73  */
  74 static void
  75 vc4_start_draw(struct vc4_context *vc4)
  76 {
  77         struct vc4_job *job = vc4->job;
  78
  79         if (job->needs_flush)
  80                 return;
  81
  82         vc4_get_draw_cl_space(job, 0);
  83
  84         struct vc4_cl_out *bcl = cl_start(&job->bcl);
  85         //   Tile state data is 48 bytes per tile, I think it can be thrown away
  86         //   as soon as binning is finished.
  87         cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
  88         cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
  89         cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
  90         cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
  91         cl_u8(&bcl, job->draw_tiles_x);
  92         cl_u8(&bcl, job->draw_tiles_y);
  93         /* Other flags are filled by kernel. */
  94         cl_u8(&bcl, job->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0);
  95
  96         /* START_TILE_BINNING resets the statechange counters in the hardware,
  97          * which are what is used when a primitive is binned to a tile to
  98          * figure out what new state packets need to be written to that tile's
  99          * command list.
 100          */
 101         cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
 102
 103         /* Reset the current compressed primitives format.  This gets modified
 104          * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
 105          * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
 106          * of every tile.
 107          */
 108         cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
 109         cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
 110                      VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
 111
 112         job->needs_flush = true;
 113         job->draw_width = vc4->framebuffer.width;
 114         job->draw_height = vc4->framebuffer.height;
 115
 116         cl_end(&job->bcl, bcl);
 117 }
 118
 119 static void
 120 vc4_predraw_check_textures(struct pipe_context *pctx,
 121                            struct vc4_texture_stateobj *stage_tex)
 122 {
 123         struct vc4_context *vc4 = vc4_context(pctx);
 124
 125         for (int i = 0; i < stage_tex->num_textures; i++) {
 126                 struct pipe_sampler_view *view = stage_tex->textures[i];
 127                 if (!view)
 128                         continue;
 129                 struct vc4_resource *rsc = vc4_resource(view->texture);
 130                 if (rsc->shadow_parent)
 131                         vc4_update_shadow_baselevel_texture(pctx, view);
 132
 133                 vc4_flush_jobs_writing_resource(vc4, view->texture);
 134         }
 135 }
 136
 137 static void
 138 vc4_emit_gl_shader_state(struct vc4_context *vc4,
 139                          const struct pipe_draw_info *info,
 140                          uint32_t extra_index_bias)
 141 {
 142         struct vc4_job *job = vc4->job;
 143         /* VC4_DIRTY_VTXSTATE */
 144         struct vc4_vertex_stateobj *vtx = vc4->vtx;
 145         /* VC4_DIRTY_VTXBUF */
 146         struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
 147
 148         /* The simulator throws a fit if VS or CS don't read an attribute, so
 149          * we emit a dummy read.
 150          */
 151         uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
 152         /* Emit the shader record. */
 153         struct vc4_cl_out *shader_rec =
 154                 cl_start_shader_reloc(&job->shader_rec, 3 + num_elements_emit);
 155         /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
 156         cl_u16(&shader_rec,
 157                VC4_SHADER_FLAG_ENABLE_CLIPPING |
 158                VC4_SHADER_FLAG_FS_SINGLE_THREAD |
 159                ((info->mode == PIPE_PRIM_POINTS &&
 160                  vc4->rasterizer->base.point_size_per_vertex) ?
 161                 VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
 162
 163         /* VC4_DIRTY_COMPILED_FS */
 164         cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
 165         cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
 166         cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
 167         cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
 168
 169         /* VC4_DIRTY_COMPILED_VS */
 170         cl_u16(&shader_rec, 0); /* vs num uniforms */
 171         cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
 172         cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
 173         cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
 174         cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
 175
 176         /* VC4_DIRTY_COMPILED_CS */
 177         cl_u16(&shader_rec, 0); /* cs num uniforms */
 178         cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
 179         cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
 180         cl_reloc(job, &job->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
 181         cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
 182
 183         uint32_t max_index = 0xffff;
 184         for (int i = 0; i < vtx->num_elements; i++) {
 185                 struct pipe_vertex_element *elem = &vtx->pipe[i];
 186                 struct pipe_vertex_buffer *vb =
 187                         &vertexbuf->vb[elem->vertex_buffer_index];
 188                 struct vc4_resource *rsc = vc4_resource(vb->buffer);
 189                 /* not vc4->dirty tracked: vc4->last_index_bias */
 190                 uint32_t offset = (vb->buffer_offset +
 191                                    elem->src_offset +
 192                                    vb->stride * (info->index_bias +
 193                                                  extra_index_bias));
 194                 uint32_t vb_size = rsc->bo->size - offset;
 195                 uint32_t elem_size =
 196                         util_format_get_blocksize(elem->src_format);
 197
 198                 cl_reloc(job, &job->shader_rec, &shader_rec, rsc->bo, offset);
 199                 cl_u8(&shader_rec, elem_size - 1);
 200                 cl_u8(&shader_rec, vb->stride);
 201                 cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
 202                 cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);
 203
 204                 if (vb->stride > 0) {
 205                         max_index = MIN2(max_index,
 206                                          (vb_size - elem_size) / vb->stride);
 207                 }
 208         }
 209
 210         if (vtx->num_elements == 0) {
 211                 assert(num_elements_emit == 1);
 212                 struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
 213                 cl_reloc(job, &job->shader_rec, &shader_rec, bo, 0);
 214                 cl_u8(&shader_rec, 16 - 1); /* element size */
 215                 cl_u8(&shader_rec, 0); /* stride */
 216                 cl_u8(&shader_rec, 0); /* VS VPM offset */
 217                 cl_u8(&shader_rec, 0); /* CS VPM offset */
 218                 vc4_bo_unreference(&bo);
 219         }
 220         cl_end(&job->shader_rec, shader_rec);
 221
 222         struct vc4_cl_out *bcl = cl_start(&job->bcl);
 223         /* the actual draw call. */
 224         cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
 225         assert(vtx->num_elements <= 8);
 226         /* Note that number of attributes == 0 in the packet means 8
 227          * attributes.  This field also contains the offset into shader_rec.
 228          */
 229         cl_u32(&bcl, num_elements_emit & 0x7);
 230         cl_end(&job->bcl, bcl);
 231
 232         vc4_write_uniforms(vc4, vc4->prog.fs,
 233                            &vc4->constbuf[PIPE_SHADER_FRAGMENT],
 234                            &vc4->fragtex);
 235         vc4_write_uniforms(vc4, vc4->prog.vs,
 236                            &vc4->constbuf[PIPE_SHADER_VERTEX],
 237                            &vc4->verttex);
 238         vc4_write_uniforms(vc4, vc4->prog.cs,
 239                            &vc4->constbuf[PIPE_SHADER_VERTEX],
 240                            &vc4->verttex);
 241
 242         vc4->last_index_bias = info->index_bias + extra_index_bias;
 243         vc4->max_index = max_index;
 244         job->shader_rec_count++;
 245 }
 246
 247 /**
 248  * HW-2116 workaround: Flush the batch before triggering the hardware state
 249  * counter wraparound behavior.
 250  *
 251  * State updates are tracked by a global counter which increments at the first
 252  * state update after a draw or a START_BINNING.  Tiles can then have their
 253  * state updated at draw time with a set of cheap checks for whether the
 254  * state's copy of the global counter matches the global counter the last time
 255  * that state was written to the tile.
 256  *
 257  * The state counters are relatively small and wrap around quickly, so you
 258  * could get false negatives for needing to update a particular state in the
 259  * tile.  To avoid this, the hardware attempts to write all of the state in
 260  * the tile at wraparound time.  This apparently is broken, so we just flush
 261  * everything before that behavior is triggered.  A batch flush is sufficient
 262  * to get our current contents drawn and reset the counters to 0.
 263  *
 264  * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the
 265  * tiles with VC4_PACKET_RETURN_FROM_LIST.
 266  */
 267 static void
 268 vc4_hw_2116_workaround(struct pipe_context *pctx, int vert_count)
 269 {
 270         struct vc4_context *vc4 = vc4_context(pctx);
 271         struct vc4_job *job = vc4_get_job_for_fbo(vc4);
 272
 273         if (job->draw_calls_queued + vert_count / 65535 >= VC4_HW_2116_COUNT) {
 274                 perf_debug("Flushing batch due to HW-2116 workaround "
 275                            "(too many draw calls per scene\n");
 276                 vc4_job_submit(vc4, job);
 277         }
 278 }
 279
 280 static void
 281 vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 282 {
 283         struct vc4_context *vc4 = vc4_context(pctx);
 284
 285         if (info->mode >= PIPE_PRIM_QUADS) {
 286                 util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
 287                 util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
 288                 util_primconvert_draw_vbo(vc4->primconvert, info);
 289                 perf_debug("Fallback conversion for %d %s vertices\n",
 290                            info->count, u_prim_name(info->mode));
 291                 return;
 292         }
 293
 294         /* Before setting up the draw, do any fixup blits necessary. */
 295         vc4_predraw_check_textures(pctx, &vc4->verttex);
 296         vc4_predraw_check_textures(pctx, &vc4->fragtex);
 297
 298         vc4_hw_2116_workaround(pctx, info->count);
 299
 300         struct vc4_job *job = vc4_get_job_for_fbo(vc4);
 301
 302         vc4_get_draw_cl_space(job, info->count);
 303
 304         if (vc4->prim_mode != info->mode) {
 305                 vc4->prim_mode = info->mode;
 306                 vc4->dirty |= VC4_DIRTY_PRIM_MODE;
 307         }
 308
 309         vc4_start_draw(vc4);
 310         if (!vc4_update_compiled_shaders(vc4, info->mode)) {
 311                 debug_warn_once("shader compile failed, skipping draw call.\n");
 312                 return;
 313         }
 314
 315         vc4_emit_state(pctx);
 316
 317         if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
 318                            VC4_DIRTY_VTXSTATE |
 319                            VC4_DIRTY_PRIM_MODE |
 320                            VC4_DIRTY_RASTERIZER |
 321                            VC4_DIRTY_COMPILED_CS |
 322                            VC4_DIRTY_COMPILED_VS |
 323                            VC4_DIRTY_COMPILED_FS |
 324                            vc4->prog.cs->uniform_dirty_bits |
 325                            vc4->prog.vs->uniform_dirty_bits |
 326                            vc4->prog.fs->uniform_dirty_bits)) ||
 327             vc4->last_index_bias != info->index_bias) {
 328                 vc4_emit_gl_shader_state(vc4, info, 0);
 329         }
 330
 331         vc4->dirty = 0;
 332
 333         /* Note that the primitive type fields match with OpenGL/gallium
 334          * definitions, up to but not including QUADS.
 335          */
 336         struct vc4_cl_out *bcl = cl_start(&job->bcl);
 337         if (info->indexed) {
 338                 uint32_t offset = vc4->indexbuf.offset;
 339                 uint32_t index_size = vc4->indexbuf.index_size;
 340                 struct pipe_resource *prsc;
 341                 if (vc4->indexbuf.index_size == 4) {
 342                         prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf,
 343                                                            info->count, &offset);
 344                         index_size = 2;
 345                 } else {
 346                         if (vc4->indexbuf.user_buffer) {
 347                                 prsc = NULL;
 348                                 u_upload_data(vc4->uploader, 0,
 349                                               info->count * index_size, 4,
 350                                               vc4->indexbuf.user_buffer,
 351                                               &offset, &prsc);
 352                         } else {
 353                                 prsc = vc4->indexbuf.buffer;
 354                         }
 355                 }
 356                 struct vc4_resource *rsc = vc4_resource(prsc);
 357
 358                 cl_start_reloc(&job->bcl, &bcl, 1);
 359                 cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
 360                 cl_u8(&bcl,
 361                       info->mode |
 362                       (index_size == 2 ?
 363                        VC4_INDEX_BUFFER_U16:
 364                        VC4_INDEX_BUFFER_U8));
 365                 cl_u32(&bcl, info->count);
 366                 cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
 367                 cl_u32(&bcl, vc4->max_index);
 368                 job->draw_calls_queued++;
 369
 370                 if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer)
 371                         pipe_resource_reference(&prsc, NULL);
 372         } else {
 373                 uint32_t count = info->count;
 374                 uint32_t start = info->start;
 375                 uint32_t extra_index_bias = 0;
 376
 377                 while (count) {
 378                         uint32_t this_count = count;
 379                         uint32_t step = count;
 380                         static const uint32_t max_verts = 65535;
 381
 382                         /* GFXH-515 / SW-5891: The binner emits 16 bit indices
 383                          * for drawarrays, which means that if start + count >
 384                          * 64k it would truncate the top bits.  Work around
 385                          * this by emitting a limited number of primitives at
 386                          * a time and reemitting the shader state pointing
 387                          * farther down the vertex attribute arrays.
 388                          *
 389                          * To do this properly for line loops or trifans, we'd
 390                          * need to make a new VB containing the first vertex
 391                          * plus whatever remainder.
 392                          */
 393                         if (extra_index_bias) {
 394                                 cl_end(&job->bcl, bcl);
 395                                 vc4_emit_gl_shader_state(vc4, info,
 396                                                          extra_index_bias);
 397                                 bcl = cl_start(&job->bcl);
 398                         }
 399
 400                         if (start + count > max_verts) {
 401                                 switch (info->mode) {
 402                                 case PIPE_PRIM_POINTS:
 403                                         this_count = step = max_verts;
 404                                         break;
 405                                 case PIPE_PRIM_LINES:
 406                                         this_count = step = max_verts - (max_verts % 2);
 407                                         break;
 408                                 case PIPE_PRIM_LINE_STRIP:
 409                                         this_count = max_verts;
 410                                         step = max_verts - 1;
 411                                         break;
 412                                 case PIPE_PRIM_LINE_LOOP:
 413                                         this_count = max_verts;
 414                                         step = max_verts - 1;
 415                                         debug_warn_once("unhandled line loop "
 416                                                         "looping behavior with "
 417                                                         ">65535 verts\n");
 418                                         break;
 419                                 case PIPE_PRIM_TRIANGLES:
 420                                         this_count = step = max_verts - (max_verts % 3);
 421                                         break;
 422                                 case PIPE_PRIM_TRIANGLE_STRIP:
 423                                         this_count = max_verts;
 424                                         step = max_verts - 2;
 425                                         break;
 426                                 default:
 427                                         debug_warn_once("unhandled primitive "
 428                                                         "max vert count, truncating\n");
 429                                         this_count = step = max_verts;
 430                                 }
 431                         }
 432
 433                         cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
 434                         cl_u8(&bcl, info->mode);
 435                         cl_u32(&bcl, this_count);
 436                         cl_u32(&bcl, start);
 437                         job->draw_calls_queued++;
 438
 439                         count -= step;
 440                         extra_index_bias += start + step;
 441                         start = 0;
 442                 }
 443         }
 444         cl_end(&job->bcl, bcl);
 445
 446         /* We shouldn't have tripped the HW_2116 bug with the GFXH-515
 447          * workaround.
 448          */
 449         assert(job->draw_calls_queued <= VC4_HW_2116_COUNT);
 450
 451         if (vc4->zsa && vc4->framebuffer.zsbuf) {
 452                 struct vc4_resource *rsc =
 453                         vc4_resource(vc4->framebuffer.zsbuf->texture);
 454
 455                 if (vc4->zsa->base.depth.enabled) {
 456                         job->resolve |= PIPE_CLEAR_DEPTH;
 457                         rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
 458                 }
 459
 460                 if (vc4->zsa->base.stencil[0].enabled) {
 461                         job->resolve |= PIPE_CLEAR_STENCIL;
 462                         rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
 463                 }
 464         }
 465
 466         job->resolve |= PIPE_CLEAR_COLOR0;
 467
 468         if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH)
 469                 vc4_flush(pctx);
 470 }
 471
 472 static uint32_t
 473 pack_rgba(enum pipe_format format, const float *rgba)
 474 {
 475         union util_color uc;
 476         util_pack_color(rgba, format, &uc);
 477         if (util_format_get_blocksize(format) == 2)
 478                 return uc.us;
 479         else
 480                 return uc.ui[0];
 481 }
 482
 483 static void
 484 vc4_clear(struct pipe_context *pctx, unsigned buffers,
 485           const union pipe_color_union *color, double depth, unsigned stencil)
 486 {
 487         struct vc4_context *vc4 = vc4_context(pctx);
 488         struct vc4_job *job = vc4_get_job_for_fbo(vc4);
 489
 490         /* We can't flag new buffers for clearing once we've queued draws.  We
 491          * could avoid this by using the 3d engine to clear.
 492          */
 493         if (job->draw_calls_queued) {
 494                 perf_debug("Flushing rendering to process new clear.\n");
 495                 vc4_job_submit(vc4, job);
 496                 job = vc4_get_job_for_fbo(vc4);
 497         }
 498
 499         if (buffers & PIPE_CLEAR_COLOR0) {
 500                 struct vc4_resource *rsc =
 501                         vc4_resource(vc4->framebuffer.cbufs[0]->texture);
 502                 uint32_t clear_color;
 503
 504                 if (vc4_rt_format_is_565(vc4->framebuffer.cbufs[0]->format)) {
 505                         /* In 565 mode, the hardware will be packing our color
 506                          * for us.
 507                          */
 508                         clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM,
 509                                                 color->f);
 510                 } else {
 511                         /* Otherwise, we need to do this packing because we
 512                          * support multiple swizzlings of RGBA8888.
 513                          */
 514                         clear_color =
 515                                 pack_rgba(vc4->framebuffer.cbufs[0]->format,
 516                                           color->f);
 517                 }
 518                 job->clear_color[0] = job->clear_color[1] = clear_color;
 519                 rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0);
 520         }
 521
 522         if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
 523                 struct vc4_resource *rsc =
 524                         vc4_resource(vc4->framebuffer.zsbuf->texture);
 525                 unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
 526
 527                 /* Clearing ZS will clear both Z and stencil, so if we're
 528                  * trying to clear just one then we need to draw a quad to do
 529                  * it instead.
 530                  */
 531                 if ((zsclear == PIPE_CLEAR_DEPTH ||
 532                      zsclear == PIPE_CLEAR_STENCIL) &&
 533                     (rsc->initialized_buffers & ~(zsclear | job->cleared)) &&
 534                     util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) {
 535                         perf_debug("Partial clear of Z+stencil buffer, "
 536                                    "drawing a quad instead of fast clearing\n");
 537                         vc4_blitter_save(vc4);
 538                         util_blitter_clear(vc4->blitter,
 539                                            vc4->framebuffer.width,
 540                                            vc4->framebuffer.height,
 541                                            1,
 542                                            zsclear,
 543                                            NULL, depth, stencil);
 544                         buffers &= ~zsclear;
 545                         if (!buffers)
 546                                 return;
 547                 }
 548
 549                 /* Though the depth buffer is stored with Z in the high 24,
 550                  * for this field we just need to store it in the low 24.
 551                  */
 552                 if (buffers & PIPE_CLEAR_DEPTH) {
 553                         job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
 554                                                        depth);
 555                 }
 556                 if (buffers & PIPE_CLEAR_STENCIL)
 557                         job->clear_stencil = stencil;
 558
 559                 rsc->initialized_buffers |= zsclear;
 560         }
 561
 562         job->draw_min_x = 0;
 563         job->draw_min_y = 0;
 564         job->draw_max_x = vc4->framebuffer.width;
 565         job->draw_max_y = vc4->framebuffer.height;
 566         job->cleared |= buffers;
 567         job->resolve |= buffers;
 568
 569         vc4_start_draw(vc4);
 570 }
 571
 572 static void
 573 vc4_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
 574                         const union pipe_color_union *color,
 575                         unsigned x, unsigned y, unsigned w, unsigned h,
 576                         bool render_condition_enabled)
 577 {
 578         fprintf(stderr, "unimpl: clear RT\n");
 579 }
 580
 581 static void
 582 vc4_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
 583                         unsigned buffers, double depth, unsigned stencil,
 584                         unsigned x, unsigned y, unsigned w, unsigned h,
 585                         bool render_condition_enabled)
 586 {
 587         fprintf(stderr, "unimpl: clear DS\n");
 588 }
 589
 590 void
 591 vc4_draw_init(struct pipe_context *pctx)
 592 {
 593         pctx->draw_vbo = vc4_draw_vbo;
 594         pctx->clear = vc4_clear;
 595         pctx->clear_render_target = vc4_clear_render_target;
 596         pctx->clear_depth_stencil = vc4_clear_depth_stencil;
 597 }