src/gallium/drivers/freedreno/freedreno_draw.c

   1 /*
   2  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "pipe/p_state.h"
  28 #include "util/u_draw.h"
  29 #include "util/u_string.h"
  30 #include "util/u_memory.h"
  31 #include "util/u_prim.h"
  32 #include "util/u_format.h"
  33 #include "util/u_helpers.h"
  34
  35 #include "freedreno_blitter.h"
  36 #include "freedreno_draw.h"
  37 #include "freedreno_context.h"
  38 #include "freedreno_fence.h"
  39 #include "freedreno_state.h"
  40 #include "freedreno_resource.h"
  41 #include "freedreno_query_acc.h"
  42 #include "freedreno_query_hw.h"
  43 #include "freedreno_util.h"
  44
  45 static void
  46 resource_read(struct fd_batch *batch, struct pipe_resource *prsc)
  47 {
  48         if (!prsc)
  49                 return;
  50         fd_batch_resource_used(batch, fd_resource(prsc), false);
  51 }
  52
  53 static void
  54 resource_written(struct fd_batch *batch, struct pipe_resource *prsc)
  55 {
  56         if (!prsc)
  57                 return;
  58         fd_batch_resource_used(batch, fd_resource(prsc), true);
  59 }
  60
  61 static void
  62 fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
  63 {
  64         struct fd_context *ctx = fd_context(pctx);
  65         struct fd_batch *batch = fd_context_batch(ctx);
  66         struct pipe_framebuffer_state *pfb = &batch->framebuffer;
  67         unsigned i, prims, buffers = 0, restore_buffers = 0;
  68
  69         /* for debugging problems with indirect draw, it is convenient
  70          * to be able to emulate it, to determine if game is feeding us
  71          * bogus data:
  72          */
  73         if (info->indirect && (fd_mesa_debug & FD_DBG_NOINDR)) {
  74                 util_draw_indirect(pctx, info);
  75                 return;
  76         }
  77
  78         if (!info->count_from_stream_output && !info->indirect &&
  79             !info->primitive_restart &&
  80             !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
  81                 return;
  82
  83         /* TODO: push down the region versions into the tiles */
  84         if (!fd_render_condition_check(pctx))
  85                 return;
  86
  87         /* emulate unsupported primitives: */
  88         if (!fd_supported_prim(ctx, info->mode)) {
  89                 if (ctx->streamout.num_targets > 0)
  90                         debug_error("stream-out with emulated prims");
  91                 util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer);
  92                 util_primconvert_draw_vbo(ctx->primconvert, info);
  93                 return;
  94         }
  95
  96         fd_fence_ref(pctx->screen, &ctx->last_fence, NULL);
  97
  98         /* Upload a user index buffer. */
  99         struct pipe_resource *indexbuf = NULL;
 100         unsigned index_offset = 0;
 101         struct pipe_draw_info new_info;
 102         if (info->index_size) {
 103                 if (info->has_user_indices) {
 104                         if (!util_upload_index_buffer(pctx, info, &indexbuf, &index_offset))
 105                                 return;
 106                         new_info = *info;
 107                         new_info.index.resource = indexbuf;
 108                         new_info.has_user_indices = false;
 109                         info = &new_info;
 110                 } else {
 111                         indexbuf = info->index.resource;
 112                 }
 113         }
 114
 115         if (ctx->in_blit) {
 116                 fd_batch_reset(batch);
 117                 fd_context_all_dirty(ctx);
 118         }
 119
 120         batch->blit = ctx->in_blit;
 121         batch->back_blit = ctx->in_shadow;
 122
 123         /* NOTE: needs to be before resource_written(batch->query_buf), otherwise
 124          * query_buf may not be created yet.
 125          */
 126         fd_batch_set_stage(batch, FD_STAGE_DRAW);
 127
 128         /*
 129          * Figure out the buffers/features we need:
 130          */
 131
 132         mtx_lock(&ctx->screen->lock);
 133
 134         if (ctx->dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA)) {
 135                 if (fd_depth_enabled(ctx)) {
 136                         if (fd_resource(pfb->zsbuf->texture)->valid) {
 137                                 restore_buffers |= FD_BUFFER_DEPTH;
 138                         } else {
 139                                 batch->invalidated |= FD_BUFFER_DEPTH;
 140                         }
 141                         batch->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
 142                         if (fd_depth_write_enabled(ctx)) {
 143                                 buffers |= FD_BUFFER_DEPTH;
 144                                 resource_written(batch, pfb->zsbuf->texture);
 145                         } else {
 146                                 resource_read(batch, pfb->zsbuf->texture);
 147                         }
 148                 }
 149
 150                 if (fd_stencil_enabled(ctx)) {
 151                         if (fd_resource(pfb->zsbuf->texture)->valid) {
 152                                 restore_buffers |= FD_BUFFER_STENCIL;
 153                         } else {
 154                                 batch->invalidated |= FD_BUFFER_STENCIL;
 155                         }
 156                         batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
 157                         buffers |= FD_BUFFER_STENCIL;
 158                         resource_written(batch, pfb->zsbuf->texture);
 159                 }
 160         }
 161
 162         if (fd_logicop_enabled(ctx))
 163                 batch->gmem_reason |= FD_GMEM_LOGICOP_ENABLED;
 164
 165         for (i = 0; i < pfb->nr_cbufs; i++) {
 166                 struct pipe_resource *surf;
 167
 168                 if (!pfb->cbufs[i])
 169                         continue;
 170
 171                 surf = pfb->cbufs[i]->texture;
 172
 173                 if (fd_resource(surf)->valid) {
 174                         restore_buffers |= PIPE_CLEAR_COLOR0 << i;
 175                 } else {
 176                         batch->invalidated |= PIPE_CLEAR_COLOR0 << i;
 177                 }
 178
 179                 buffers |= PIPE_CLEAR_COLOR0 << i;
 180
 181                 if (fd_blend_enabled(ctx, i))
 182                         batch->gmem_reason |= FD_GMEM_BLEND_ENABLED;
 183
 184                 if (ctx->dirty & FD_DIRTY_FRAMEBUFFER)
 185                         resource_written(batch, pfb->cbufs[i]->texture);
 186         }
 187
 188         /* Mark SSBOs as being written.. we don't actually know which ones are
 189          * read vs written, so just assume the worst
 190          */
 191         if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) {
 192                 foreach_bit(i, ctx->shaderbuf[PIPE_SHADER_FRAGMENT].enabled_mask)
 193                                 resource_written(batch, ctx->shaderbuf[PIPE_SHADER_FRAGMENT].sb[i].buffer);
 194         }
 195
 196         if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE) {
 197                 foreach_bit(i, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask) {
 198                         struct pipe_image_view *img =
 199                                         &ctx->shaderimg[PIPE_SHADER_FRAGMENT].si[i];
 200                         if (img->access & PIPE_IMAGE_ACCESS_WRITE)
 201                                 resource_written(batch, img->resource);
 202                         else
 203                                 resource_read(batch, img->resource);
 204                 }
 205         }
 206
 207         if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_CONST) {
 208                 foreach_bit(i, ctx->constbuf[PIPE_SHADER_VERTEX].enabled_mask)
 209                         resource_read(batch, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
 210         }
 211
 212         if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_CONST) {
 213                 foreach_bit(i, ctx->constbuf[PIPE_SHADER_FRAGMENT].enabled_mask)
 214                         resource_read(batch, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
 215         }
 216
 217         /* Mark VBOs as being read */
 218         if (ctx->dirty & FD_DIRTY_VTXBUF) {
 219                 foreach_bit(i, ctx->vtx.vertexbuf.enabled_mask) {
 220                         assert(!ctx->vtx.vertexbuf.vb[i].is_user_buffer);
 221                         resource_read(batch, ctx->vtx.vertexbuf.vb[i].buffer.resource);
 222                 }
 223         }
 224
 225         /* Mark index buffer as being read */
 226         resource_read(batch, indexbuf);
 227
 228         /* Mark indirect draw buffer as being read */
 229         if (info->indirect)
 230                 resource_read(batch, info->indirect->buffer);
 231
 232         /* Mark textures as being read */
 233         if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
 234                 foreach_bit(i, ctx->tex[PIPE_SHADER_VERTEX].valid_textures)
 235                         resource_read(batch, ctx->tex[PIPE_SHADER_VERTEX].textures[i]->texture);
 236         }
 237
 238         if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
 239                 foreach_bit(i, ctx->tex[PIPE_SHADER_FRAGMENT].valid_textures)
 240                         resource_read(batch, ctx->tex[PIPE_SHADER_FRAGMENT].textures[i]->texture);
 241         }
 242
 243         /* Mark streamout buffers as being written.. */
 244         if (ctx->dirty & FD_DIRTY_STREAMOUT) {
 245                 for (i = 0; i < ctx->streamout.num_targets; i++)
 246                         if (ctx->streamout.targets[i])
 247                                 resource_written(batch, ctx->streamout.targets[i]->buffer);
 248         }
 249
 250         resource_written(batch, batch->query_buf);
 251
 252         list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node)
 253                 resource_written(batch, aq->prsc);
 254
 255         mtx_unlock(&ctx->screen->lock);
 256
 257         batch->num_draws++;
 258
 259         prims = u_reduced_prims_for_vertices(info->mode, info->count);
 260
 261         ctx->stats.draw_calls++;
 262
 263         /* TODO prims_emitted should be clipped when the stream-out buffer is
 264          * not large enough.  See max_tf_vtx().. probably need to move that
 265          * into common code.  Although a bit more annoying since a2xx doesn't
 266          * use ir3 so no common way to get at the pipe_stream_output_info
 267          * which is needed for this calculation.
 268          */
 269         if (ctx->streamout.num_targets > 0)
 270                 ctx->stats.prims_emitted += prims;
 271         ctx->stats.prims_generated += prims;
 272
 273         /* any buffers that haven't been cleared yet, we need to restore: */
 274         batch->restore |= restore_buffers & (FD_BUFFER_ALL & ~batch->invalidated);
 275         /* and any buffers used, need to be resolved: */
 276         batch->resolve |= buffers;
 277
 278         DBG("%p: %x %ux%u num_draws=%u (%s/%s)", batch, buffers,
 279                 pfb->width, pfb->height, batch->num_draws,
 280                 util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 281                 util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 282
 283         if (ctx->draw_vbo(ctx, info, index_offset))
 284                 batch->needs_flush = true;
 285
 286         batch->num_vertices += info->count * info->instance_count;
 287
 288         for (i = 0; i < ctx->streamout.num_targets; i++)
 289                 ctx->streamout.offsets[i] += info->count;
 290
 291         if (fd_mesa_debug & FD_DBG_DDRAW)
 292                 fd_context_all_dirty(ctx);
 293
 294         fd_batch_check_size(batch);
 295
 296         if (info == &new_info)
 297                 pipe_resource_reference(&indexbuf, NULL);
 298 }
 299
 300 static void
 301 fd_clear(struct pipe_context *pctx, unsigned buffers,
 302                 const union pipe_color_union *color, double depth, unsigned stencil)
 303 {
 304         struct fd_context *ctx = fd_context(pctx);
 305         struct fd_batch *batch = fd_context_batch(ctx);
 306         struct pipe_framebuffer_state *pfb = &batch->framebuffer;
 307         unsigned cleared_buffers;
 308         int i;
 309
 310         /* TODO: push down the region versions into the tiles */
 311         if (!fd_render_condition_check(pctx))
 312                 return;
 313
 314         fd_fence_ref(pctx->screen, &ctx->last_fence, NULL);
 315
 316         if (ctx->in_blit) {
 317                 fd_batch_reset(batch);
 318                 fd_context_all_dirty(ctx);
 319         }
 320
 321         /* pctx->clear() is only for full-surface clears, so scissor is
 322          * equivalent to having GL_SCISSOR_TEST disabled:
 323          */
 324         batch->max_scissor.minx = 0;
 325         batch->max_scissor.miny = 0;
 326         batch->max_scissor.maxx = pfb->width;
 327         batch->max_scissor.maxy = pfb->height;
 328
 329         /* for bookkeeping about which buffers have been cleared (and thus
 330          * can fully or partially skip mem2gmem) we need to ignore buffers
 331          * that have already had a draw, in case apps do silly things like
 332          * clear after draw (ie. if you only clear the color buffer, but
 333          * something like alpha-test causes side effects from the draw in
 334          * the depth buffer, etc)
 335          */
 336         cleared_buffers = buffers & (FD_BUFFER_ALL & ~batch->restore);
 337         batch->cleared |= buffers;
 338         batch->invalidated |= cleared_buffers;
 339
 340         batch->resolve |= buffers;
 341         batch->needs_flush = true;
 342
 343         mtx_lock(&ctx->screen->lock);
 344
 345         if (buffers & PIPE_CLEAR_COLOR)
 346                 for (i = 0; i < pfb->nr_cbufs; i++)
 347                         if (buffers & (PIPE_CLEAR_COLOR0 << i))
 348                                 resource_written(batch, pfb->cbufs[i]->texture);
 349
 350         if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
 351                 resource_written(batch, pfb->zsbuf->texture);
 352                 batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
 353         }
 354
 355         resource_written(batch, batch->query_buf);
 356
 357         list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node)
 358                 resource_written(batch, aq->prsc);
 359
 360         mtx_unlock(&ctx->screen->lock);
 361
 362         DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers,
 363                 pfb->width, pfb->height, depth, stencil,
 364                 util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 365                 util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 366
 367         /* if per-gen backend doesn't implement ctx->clear() generic
 368          * blitter clear:
 369          */
 370         bool fallback = true;
 371
 372         if (ctx->clear) {
 373                 fd_batch_set_stage(batch, FD_STAGE_CLEAR);
 374
 375                 if (ctx->clear(ctx, buffers, color, depth, stencil)) {
 376                         if (fd_mesa_debug & FD_DBG_DCLEAR)
 377                                 fd_context_all_dirty(ctx);
 378
 379                         fallback = false;
 380                 }
 381         }
 382
 383         if (fallback) {
 384                 fd_blitter_clear(pctx, buffers, color, depth, stencil);
 385         }
 386
 387         fd_batch_check_size(batch);
 388 }
 389
 390 static void
 391 fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
 392                 const union pipe_color_union *color,
 393                 unsigned x, unsigned y, unsigned w, unsigned h,
 394                 bool render_condition_enabled)
 395 {
 396         DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
 397 }
 398
 399 static void
 400 fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
 401                 unsigned buffers, double depth, unsigned stencil,
 402                 unsigned x, unsigned y, unsigned w, unsigned h,
 403                 bool render_condition_enabled)
 404 {
 405         DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
 406                         buffers, depth, stencil, x, y, w, h);
 407 }
 408
 409 static void
 410 fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
 411 {
 412         struct fd_context *ctx = fd_context(pctx);
 413         struct fd_batch *batch, *save_batch = NULL;
 414         unsigned i;
 415
 416         batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
 417         fd_batch_reference(&save_batch, ctx->batch);
 418         fd_batch_reference(&ctx->batch, batch);
 419         fd_context_all_dirty(ctx);
 420
 421         mtx_lock(&ctx->screen->lock);
 422
 423         /* Mark SSBOs as being written.. we don't actually know which ones are
 424          * read vs written, so just assume the worst
 425          */
 426         foreach_bit(i, ctx->shaderbuf[PIPE_SHADER_COMPUTE].enabled_mask)
 427                 resource_written(batch, ctx->shaderbuf[PIPE_SHADER_COMPUTE].sb[i].buffer);
 428
 429         foreach_bit(i, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask) {
 430                 struct pipe_image_view *img =
 431                         &ctx->shaderimg[PIPE_SHADER_COMPUTE].si[i];
 432                 if (img->access & PIPE_IMAGE_ACCESS_WRITE)
 433                         resource_written(batch, img->resource);
 434                 else
 435                         resource_read(batch, img->resource);
 436         }
 437
 438         /* UBO's are read */
 439         foreach_bit(i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask)
 440                 resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer);
 441
 442         /* Mark textures as being read */
 443         foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
 444                 resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
 445
 446         /* For global buffers, we don't really know if read or written, so assume
 447          * the worst:
 448          */
 449         foreach_bit(i, ctx->global_bindings.enabled_mask)
 450                 resource_written(batch, ctx->global_bindings.buf[i]);
 451
 452         if (info->indirect)
 453                 resource_read(batch, info->indirect);
 454
 455         mtx_unlock(&ctx->screen->lock);
 456
 457         batch->needs_flush = true;
 458         ctx->launch_grid(ctx, info);
 459
 460         fd_batch_flush(batch, false);
 461
 462         fd_batch_reference(&ctx->batch, save_batch);
 463         fd_context_all_dirty(ctx);
 464         fd_batch_reference(&save_batch, NULL);
 465         fd_batch_reference(&batch, NULL);
 466 }
 467
 468 void
 469 fd_draw_init(struct pipe_context *pctx)
 470 {
 471         pctx->draw_vbo = fd_draw_vbo;
 472         pctx->clear = fd_clear;
 473         pctx->clear_render_target = fd_clear_render_target;
 474         pctx->clear_depth_stencil = fd_clear_depth_stencil;
 475
 476         if (has_compute(fd_screen(pctx->screen))) {
 477                 pctx->launch_grid = fd_launch_grid;
 478         }
 479 }