src/gallium/drivers/freedreno/freedreno_draw.c

   1 /*
   2  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "pipe/p_state.h"
  28 #include "util/u_draw.h"
  29 #include "util/u_string.h"
  30 #include "util/u_memory.h"
  31 #include "util/u_prim.h"
  32 #include "util/format/u_format.h"
  33 #include "util/u_helpers.h"
  34
  35 #include "freedreno_blitter.h"
  36 #include "freedreno_draw.h"
  37 #include "freedreno_context.h"
  38 #include "freedreno_fence.h"
  39 #include "freedreno_state.h"
  40 #include "freedreno_resource.h"
  41 #include "freedreno_query_acc.h"
  42 #include "freedreno_query_hw.h"
  43 #include "freedreno_util.h"
  44
  45 static void
  46 resource_read(struct fd_batch *batch, struct pipe_resource *prsc)
  47 {
  48         if (!prsc)
  49                 return;
  50         fd_batch_resource_used(batch, fd_resource(prsc), false);
  51 }
  52
  53 static void
  54 resource_written(struct fd_batch *batch, struct pipe_resource *prsc)
  55 {
  56         if (!prsc)
  57                 return;
  58         fd_batch_resource_used(batch, fd_resource(prsc), true);
  59 }
  60
  61 static void
  62 fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
  63 {
  64         struct fd_context *ctx = fd_context(pctx);
  65         struct fd_batch *batch = fd_context_batch(ctx);
  66         struct pipe_framebuffer_state *pfb = &batch->framebuffer;
  67         unsigned i, prims, buffers = 0, restore_buffers = 0;
  68
  69         /* for debugging problems with indirect draw, it is convenient
  70          * to be able to emulate it, to determine if game is feeding us
  71          * bogus data:
  72          */
  73         if (info->indirect && (fd_mesa_debug & FD_DBG_NOINDR)) {
  74                 util_draw_indirect(pctx, info);
  75                 return;
  76         }
  77
  78         if (!info->count_from_stream_output && !info->indirect &&
  79             !info->primitive_restart &&
  80             !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
  81                 return;
  82
  83         /* TODO: push down the region versions into the tiles */
  84         if (!fd_render_condition_check(pctx))
  85                 return;
  86
  87         /* emulate unsupported primitives: */
  88         if (!fd_supported_prim(ctx, info->mode)) {
  89                 if (ctx->streamout.num_targets > 0)
  90                         debug_error("stream-out with emulated prims");
  91                 util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer);
  92                 util_primconvert_draw_vbo(ctx->primconvert, info);
  93                 return;
  94         }
  95
  96         fd_fence_ref(&ctx->last_fence, NULL);
  97
  98         /* Upload a user index buffer. */
  99         struct pipe_resource *indexbuf = NULL;
 100         unsigned index_offset = 0;
 101         struct pipe_draw_info new_info;
 102         if (info->index_size) {
 103                 if (info->has_user_indices) {
 104                         if (!util_upload_index_buffer(pctx, info, &indexbuf, &index_offset))
 105                                 return;
 106                         new_info = *info;
 107                         new_info.index.resource = indexbuf;
 108                         new_info.has_user_indices = false;
 109                         info = &new_info;
 110                 } else {
 111                         indexbuf = info->index.resource;
 112                 }
 113         }
 114
 115         if (ctx->in_blit) {
 116                 fd_batch_reset(batch);
 117                 fd_context_all_dirty(ctx);
 118         }
 119
 120         batch->blit = ctx->in_blit;
 121         batch->back_blit = ctx->in_shadow;
 122
 123         /* NOTE: needs to be before resource_written(batch->query_buf), otherwise
 124          * query_buf may not be created yet.
 125          */
 126         fd_batch_set_stage(batch, FD_STAGE_DRAW);
 127
 128         /*
 129          * Figure out the buffers/features we need:
 130          */
 131
 132         mtx_lock(&ctx->screen->lock);
 133
 134         if (ctx->dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA)) {
 135                 if (fd_depth_enabled(ctx)) {
 136                         if (fd_resource(pfb->zsbuf->texture)->valid) {
 137                                 restore_buffers |= FD_BUFFER_DEPTH;
 138                         } else {
 139                                 batch->invalidated |= FD_BUFFER_DEPTH;
 140                         }
 141                         batch->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
 142                         if (fd_depth_write_enabled(ctx)) {
 143                                 buffers |= FD_BUFFER_DEPTH;
 144                                 resource_written(batch, pfb->zsbuf->texture);
 145                         } else {
 146                                 resource_read(batch, pfb->zsbuf->texture);
 147                         }
 148                 }
 149
 150                 if (fd_stencil_enabled(ctx)) {
 151                         if (fd_resource(pfb->zsbuf->texture)->valid) {
 152                                 restore_buffers |= FD_BUFFER_STENCIL;
 153                         } else {
 154                                 batch->invalidated |= FD_BUFFER_STENCIL;
 155                         }
 156                         batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
 157                         buffers |= FD_BUFFER_STENCIL;
 158                         resource_written(batch, pfb->zsbuf->texture);
 159                 }
 160         }
 161
 162         if (fd_logicop_enabled(ctx))
 163                 batch->gmem_reason |= FD_GMEM_LOGICOP_ENABLED;
 164
 165         for (i = 0; i < pfb->nr_cbufs; i++) {
 166                 struct pipe_resource *surf;
 167
 168                 if (!pfb->cbufs[i])
 169                         continue;
 170
 171                 surf = pfb->cbufs[i]->texture;
 172
 173                 if (fd_resource(surf)->valid) {
 174                         restore_buffers |= PIPE_CLEAR_COLOR0 << i;
 175                 } else {
 176                         batch->invalidated |= PIPE_CLEAR_COLOR0 << i;
 177                 }
 178
 179                 buffers |= PIPE_CLEAR_COLOR0 << i;
 180
 181                 if (fd_blend_enabled(ctx, i))
 182                         batch->gmem_reason |= FD_GMEM_BLEND_ENABLED;
 183
 184                 if (ctx->dirty & FD_DIRTY_FRAMEBUFFER)
 185                         resource_written(batch, pfb->cbufs[i]->texture);
 186         }
 187
 188         /* Mark SSBOs as being written.. we don't actually know which ones are
 189          * read vs written, so just assume the worst
 190          */
 191         if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_SSBO) {
 192                 foreach_bit(i, ctx->shaderbuf[PIPE_SHADER_FRAGMENT].enabled_mask)
 193                                 resource_written(batch, ctx->shaderbuf[PIPE_SHADER_FRAGMENT].sb[i].buffer);
 194         }
 195
 196         if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_IMAGE) {
 197                 foreach_bit(i, ctx->shaderimg[PIPE_SHADER_FRAGMENT].enabled_mask) {
 198                         struct pipe_image_view *img =
 199                                         &ctx->shaderimg[PIPE_SHADER_FRAGMENT].si[i];
 200                         if (img->access & PIPE_IMAGE_ACCESS_WRITE)
 201                                 resource_written(batch, img->resource);
 202                         else
 203                                 resource_read(batch, img->resource);
 204                 }
 205         }
 206
 207         if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_CONST) {
 208                 foreach_bit(i, ctx->constbuf[PIPE_SHADER_VERTEX].enabled_mask)
 209                         resource_read(batch, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
 210         }
 211
 212         if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_CONST) {
 213                 foreach_bit(i, ctx->constbuf[PIPE_SHADER_FRAGMENT].enabled_mask)
 214                         resource_read(batch, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
 215         }
 216
 217         /* Mark VBOs as being read */
 218         if (ctx->dirty & FD_DIRTY_VTXBUF) {
 219                 foreach_bit(i, ctx->vtx.vertexbuf.enabled_mask) {
 220                         assert(!ctx->vtx.vertexbuf.vb[i].is_user_buffer);
 221                         resource_read(batch, ctx->vtx.vertexbuf.vb[i].buffer.resource);
 222                 }
 223         }
 224
 225         /* Mark index buffer as being read */
 226         resource_read(batch, indexbuf);
 227
 228         /* Mark indirect draw buffer as being read */
 229         if (info->indirect)
 230                 resource_read(batch, info->indirect->buffer);
 231
 232         /* Mark textures as being read */
 233         if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) {
 234                 foreach_bit(i, ctx->tex[PIPE_SHADER_VERTEX].valid_textures)
 235                         resource_read(batch, ctx->tex[PIPE_SHADER_VERTEX].textures[i]->texture);
 236         }
 237
 238         if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) {
 239                 foreach_bit(i, ctx->tex[PIPE_SHADER_FRAGMENT].valid_textures)
 240                         resource_read(batch, ctx->tex[PIPE_SHADER_FRAGMENT].textures[i]->texture);
 241         }
 242
 243         /* Mark streamout buffers as being written.. */
 244         if (ctx->dirty & FD_DIRTY_STREAMOUT) {
 245                 for (i = 0; i < ctx->streamout.num_targets; i++)
 246                         if (ctx->streamout.targets[i])
 247                                 resource_written(batch, ctx->streamout.targets[i]->buffer);
 248         }
 249
 250         resource_written(batch, batch->query_buf);
 251
 252         list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node)
 253                 resource_written(batch, aq->prsc);
 254
 255         mtx_unlock(&ctx->screen->lock);
 256
 257         batch->num_draws++;
 258
 259         /* Counting prims in sw doesn't work for GS and tesselation. For older
 260          * gens we don't have those stages and don't have the hw counters enabled,
 261          * so keep the count accurate for non-patch geometry.
 262          */
 263         if (info->mode != PIPE_PRIM_PATCHES)
 264                 prims = u_reduced_prims_for_vertices(info->mode, info->count);
 265         else
 266                 prims = 0;
 267
 268         ctx->stats.draw_calls++;
 269
 270         /* TODO prims_emitted should be clipped when the stream-out buffer is
 271          * not large enough.  See max_tf_vtx().. probably need to move that
 272          * into common code.  Although a bit more annoying since a2xx doesn't
 273          * use ir3 so no common way to get at the pipe_stream_output_info
 274          * which is needed for this calculation.
 275          */
 276         if (ctx->streamout.num_targets > 0)
 277                 ctx->stats.prims_emitted += prims;
 278         ctx->stats.prims_generated += prims;
 279
 280         /* any buffers that haven't been cleared yet, we need to restore: */
 281         batch->restore |= restore_buffers & (FD_BUFFER_ALL & ~batch->invalidated);
 282         /* and any buffers used, need to be resolved: */
 283         batch->resolve |= buffers;
 284
 285         DBG("%p: %x %ux%u num_draws=%u (%s/%s)", batch, buffers,
 286                 pfb->width, pfb->height, batch->num_draws,
 287                 util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 288                 util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 289
 290         if (ctx->draw_vbo(ctx, info, index_offset))
 291                 batch->needs_flush = true;
 292
 293         batch->num_vertices += info->count * info->instance_count;
 294
 295         for (i = 0; i < ctx->streamout.num_targets; i++)
 296                 ctx->streamout.offsets[i] += info->count;
 297
 298         if (fd_mesa_debug & FD_DBG_DDRAW)
 299                 fd_context_all_dirty(ctx);
 300
 301         fd_batch_check_size(batch);
 302
 303         if (info == &new_info)
 304                 pipe_resource_reference(&indexbuf, NULL);
 305 }
 306
 307 static void
 308 fd_clear(struct pipe_context *pctx, unsigned buffers,
 309                 const union pipe_color_union *color, double depth, unsigned stencil)
 310 {
 311         struct fd_context *ctx = fd_context(pctx);
 312         struct fd_batch *batch = fd_context_batch(ctx);
 313         struct pipe_framebuffer_state *pfb = &batch->framebuffer;
 314         unsigned cleared_buffers;
 315         int i;
 316
 317         /* TODO: push down the region versions into the tiles */
 318         if (!fd_render_condition_check(pctx))
 319                 return;
 320
 321         fd_fence_ref(&ctx->last_fence, NULL);
 322
 323         if (ctx->in_blit) {
 324                 fd_batch_reset(batch);
 325                 fd_context_all_dirty(ctx);
 326         }
 327
 328         /* pctx->clear() is only for full-surface clears, so scissor is
 329          * equivalent to having GL_SCISSOR_TEST disabled:
 330          */
 331         batch->max_scissor.minx = 0;
 332         batch->max_scissor.miny = 0;
 333         batch->max_scissor.maxx = pfb->width;
 334         batch->max_scissor.maxy = pfb->height;
 335
 336         /* for bookkeeping about which buffers have been cleared (and thus
 337          * can fully or partially skip mem2gmem) we need to ignore buffers
 338          * that have already had a draw, in case apps do silly things like
 339          * clear after draw (ie. if you only clear the color buffer, but
 340          * something like alpha-test causes side effects from the draw in
 341          * the depth buffer, etc)
 342          */
 343         cleared_buffers = buffers & (FD_BUFFER_ALL & ~batch->restore);
 344         batch->cleared |= buffers;
 345         batch->invalidated |= cleared_buffers;
 346
 347         batch->resolve |= buffers;
 348         batch->needs_flush = true;
 349
 350         mtx_lock(&ctx->screen->lock);
 351
 352         if (buffers & PIPE_CLEAR_COLOR)
 353                 for (i = 0; i < pfb->nr_cbufs; i++)
 354                         if (buffers & (PIPE_CLEAR_COLOR0 << i))
 355                                 resource_written(batch, pfb->cbufs[i]->texture);
 356
 357         if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
 358                 resource_written(batch, pfb->zsbuf->texture);
 359                 batch->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
 360         }
 361
 362         resource_written(batch, batch->query_buf);
 363
 364         list_for_each_entry(struct fd_acc_query, aq, &ctx->acc_active_queries, node)
 365                 resource_written(batch, aq->prsc);
 366
 367         mtx_unlock(&ctx->screen->lock);
 368
 369         DBG("%p: %x %ux%u depth=%f, stencil=%u (%s/%s)", batch, buffers,
 370                 pfb->width, pfb->height, depth, stencil,
 371                 util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 372                 util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 373
 374         /* if per-gen backend doesn't implement ctx->clear() generic
 375          * blitter clear:
 376          */
 377         bool fallback = true;
 378
 379         if (ctx->clear) {
 380                 fd_batch_set_stage(batch, FD_STAGE_CLEAR);
 381
 382                 if (ctx->clear(ctx, buffers, color, depth, stencil)) {
 383                         if (fd_mesa_debug & FD_DBG_DCLEAR)
 384                                 fd_context_all_dirty(ctx);
 385
 386                         fallback = false;
 387                 }
 388         }
 389
 390         if (fallback) {
 391                 fd_blitter_clear(pctx, buffers, color, depth, stencil);
 392         }
 393
 394         fd_batch_check_size(batch);
 395 }
 396
 397 static void
 398 fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
 399                 const union pipe_color_union *color,
 400                 unsigned x, unsigned y, unsigned w, unsigned h,
 401                 bool render_condition_enabled)
 402 {
 403         DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
 404 }
 405
 406 static void
 407 fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
 408                 unsigned buffers, double depth, unsigned stencil,
 409                 unsigned x, unsigned y, unsigned w, unsigned h,
 410                 bool render_condition_enabled)
 411 {
 412         DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
 413                         buffers, depth, stencil, x, y, w, h);
 414 }
 415
 416 static void
 417 fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
 418 {
 419         struct fd_context *ctx = fd_context(pctx);
 420         struct fd_batch *batch, *save_batch = NULL;
 421         unsigned i;
 422
 423         batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
 424         fd_batch_reference(&save_batch, ctx->batch);
 425         fd_batch_reference(&ctx->batch, batch);
 426         fd_context_all_dirty(ctx);
 427
 428         mtx_lock(&ctx->screen->lock);
 429
 430         /* Mark SSBOs as being written.. we don't actually know which ones are
 431          * read vs written, so just assume the worst
 432          */
 433         foreach_bit(i, ctx->shaderbuf[PIPE_SHADER_COMPUTE].enabled_mask)
 434                 resource_written(batch, ctx->shaderbuf[PIPE_SHADER_COMPUTE].sb[i].buffer);
 435
 436         foreach_bit(i, ctx->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask) {
 437                 struct pipe_image_view *img =
 438                         &ctx->shaderimg[PIPE_SHADER_COMPUTE].si[i];
 439                 if (img->access & PIPE_IMAGE_ACCESS_WRITE)
 440                         resource_written(batch, img->resource);
 441                 else
 442                         resource_read(batch, img->resource);
 443         }
 444
 445         /* UBO's are read */
 446         foreach_bit(i, ctx->constbuf[PIPE_SHADER_COMPUTE].enabled_mask)
 447                 resource_read(batch, ctx->constbuf[PIPE_SHADER_COMPUTE].cb[i].buffer);
 448
 449         /* Mark textures as being read */
 450         foreach_bit(i, ctx->tex[PIPE_SHADER_COMPUTE].valid_textures)
 451                 resource_read(batch, ctx->tex[PIPE_SHADER_COMPUTE].textures[i]->texture);
 452
 453         /* For global buffers, we don't really know if read or written, so assume
 454          * the worst:
 455          */
 456         foreach_bit(i, ctx->global_bindings.enabled_mask)
 457                 resource_written(batch, ctx->global_bindings.buf[i]);
 458
 459         if (info->indirect)
 460                 resource_read(batch, info->indirect);
 461
 462         mtx_unlock(&ctx->screen->lock);
 463
 464         batch->needs_flush = true;
 465         ctx->launch_grid(ctx, info);
 466
 467         fd_batch_flush(batch, false);
 468
 469         fd_batch_reference(&ctx->batch, save_batch);
 470         fd_context_all_dirty(ctx);
 471         fd_batch_reference(&save_batch, NULL);
 472         fd_batch_reference(&batch, NULL);
 473 }
 474
 475 void
 476 fd_draw_init(struct pipe_context *pctx)
 477 {
 478         pctx->draw_vbo = fd_draw_vbo;
 479         pctx->clear = fd_clear;
 480         pctx->clear_render_target = fd_clear_render_target;
 481         pctx->clear_depth_stencil = fd_clear_depth_stencil;
 482
 483         if (has_compute(fd_screen(pctx->screen))) {
 484                 pctx->launch_grid = fd_launch_grid;
 485         }
 486 }