src/gallium/drivers/swr/swr_draw.cpp

   1 /****************************************************************************
   2  * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  ***************************************************************************/
  23
  24 #include "swr_screen.h"
  25 #include "swr_context.h"
  26 #include "swr_resource.h"
  27 #include "swr_fence.h"
  28 #include "swr_query.h"
  29 #include "jit_api.h"
  30
  31 #include "util/u_draw.h"
  32 #include "util/u_prim.h"
  33
  34 /*
  35  * Draw vertex arrays, with optional indexing, optional instancing.
  36  */
  37 static void
  38 swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  39 {
  40    struct swr_context *ctx = swr_context(pipe);
  41
  42    if (!info->count_from_stream_output && !info->indirect &&
  43        !info->primitive_restart &&
  44        !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
  45       return;
  46
  47    if (!swr_check_render_cond(pipe))
  48       return;
  49
  50    if (info->indirect) {
  51       util_draw_indirect(pipe, info);
  52       return;
  53    }
  54
  55    /* Update derived state, pass draw info to update function */
  56    swr_update_derived(pipe, info);
  57
  58    swr_update_draw_context(ctx);
  59
  60    if (ctx->vs->pipe.stream_output.num_outputs) {
  61       if (!ctx->vs->soFunc[info->mode]) {
  62          STREAMOUT_COMPILE_STATE state = {0};
  63          struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
  64
  65          state.numVertsPerPrim = u_vertices_per_prim(info->mode);
  66
  67          uint32_t offsets[MAX_SO_STREAMS] = {0};
  68          uint32_t num = 0;
  69
  70          for (uint32_t i = 0; i < so->num_outputs; i++) {
  71             assert(so->output[i].stream == 0); // @todo
  72             uint32_t output_buffer = so->output[i].output_buffer;
  73             if (so->output[i].dst_offset != offsets[output_buffer]) {
  74                // hole - need to fill
  75                state.stream.decl[num].bufferIndex = output_buffer;
  76                state.stream.decl[num].hole = true;
  77                state.stream.decl[num].componentMask =
  78                   (1 << (so->output[i].dst_offset - offsets[output_buffer]))
  79                   - 1;
  80                num++;
  81                offsets[output_buffer] = so->output[i].dst_offset;
  82             }
  83
  84             unsigned attrib_slot = so->output[i].register_index;
  85             attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
  86
  87             state.stream.decl[num].bufferIndex = output_buffer;
  88             state.stream.decl[num].attribSlot = attrib_slot;
  89             state.stream.decl[num].componentMask =
  90                ((1 << so->output[i].num_components) - 1)
  91                << so->output[i].start_component;
  92             state.stream.decl[num].hole = false;
  93             num++;
  94
  95             offsets[output_buffer] += so->output[i].num_components;
  96          }
  97
  98          state.stream.numDecls = num;
  99
 100          HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
 101          ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
 102          debug_printf("so shader    %p\n", ctx->vs->soFunc[info->mode]);
 103          assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
 104       }
 105
 106       ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
 107    }
 108
 109    struct swr_vertex_element_state *velems = ctx->velems;
 110    if (info->primitive_restart)
 111       velems->fsState.cutIndex = info->restart_index;
 112    else
 113       velems->fsState.cutIndex = 0;
 114    velems->fsState.bEnableCutIndex = info->primitive_restart;
 115    velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
 116
 117    swr_jit_fetch_key key;
 118    swr_generate_fetch_key(key, velems);
 119    auto search = velems->map.find(key);
 120    if (search != velems->map.end()) {
 121       velems->fsFunc = search->second;
 122    } else {
 123       HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
 124       velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
 125
 126       debug_printf("fetch shader %p\n", velems->fsFunc);
 127       assert(velems->fsFunc && "Error: FetchShader = NULL");
 128
 129       velems->map.insert(std::make_pair(key, velems->fsFunc));
 130    }
 131
 132    ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
 133
 134    /* Set up frontend state
 135     * XXX setup provokingVertex & topologyProvokingVertex */
 136    SWR_FRONTEND_STATE feState = {0};
 137
 138    // feState.vsVertexSize seeds the PA size that is used as an interface
 139    // between all the shader stages, so it has to be large enough to
 140    // incorporate all interfaces between stages
 141
 142    // max of gs and vs num_outputs
 143    feState.vsVertexSize = ctx->vs->info.base.num_outputs;
 144    if (ctx->gs &&
 145        ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
 146       feState.vsVertexSize = ctx->gs->info.base.num_outputs;
 147    }
 148
 149    if (ctx->vs->info.base.num_outputs) {
 150       // gs does not adjust for position in SGV slot at input from vs
 151       if (!ctx->gs)
 152          feState.vsVertexSize--;
 153    }
 154
 155    // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
 156    feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
 157
 158    // The PA in the clipper does not handle BE vertex sizes
 159    // different from FE. Increase vertexsize only for the cases that needed it
 160
 161    // primid needs a slot
 162    if (ctx->fs->info.base.uses_primid)
 163       feState.vsVertexSize++;
 164    // sprite coord enable
 165    if (ctx->rasterizer->sprite_coord_enable)
 166       feState.vsVertexSize++;
 167
 168
 169    if (ctx->rasterizer->flatshade_first) {
 170       feState.provokingVertex = {1, 0, 0};
 171    } else {
 172       feState.provokingVertex = {2, 1, 2};
 173    }
 174
 175    enum pipe_prim_type topology;
 176    if (ctx->gs)
 177       topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
 178    else
 179       topology = info->mode;
 180
 181    switch (topology) {
 182    case PIPE_PRIM_TRIANGLE_FAN:
 183       feState.topologyProvokingVertex = feState.provokingVertex.triFan;
 184       break;
 185    case PIPE_PRIM_TRIANGLE_STRIP:
 186    case PIPE_PRIM_TRIANGLES:
 187       feState.topologyProvokingVertex = feState.provokingVertex.triStripList;
 188       break;
 189    case PIPE_PRIM_QUAD_STRIP:
 190    case PIPE_PRIM_QUADS:
 191       if (ctx->rasterizer->flatshade_first)
 192          feState.topologyProvokingVertex = 0;
 193       else
 194          feState.topologyProvokingVertex = 3;
 195       break;
 196    case PIPE_PRIM_LINES:
 197    case PIPE_PRIM_LINE_LOOP:
 198    case PIPE_PRIM_LINE_STRIP:
 199       feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;
 200       break;
 201    default:
 202       feState.topologyProvokingVertex = 0;
 203    }
 204
 205    feState.bEnableCutIndex = info->primitive_restart;
 206    ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);
 207
 208    if (info->index_size)
 209       ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
 210                                           swr_convert_prim_topology(info->mode),
 211                                           info->count,
 212                                           info->instance_count,
 213                                           info->start,
 214                                           info->index_bias,
 215                                           info->start_instance);
 216    else
 217       ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
 218                                    swr_convert_prim_topology(info->mode),
 219                                    info->count,
 220                                    info->instance_count,
 221                                    info->start,
 222                                    info->start_instance);
 223
 224    /* On large client-buffer draw, we used client buffer directly, without
 225     * copy.  Block until draw is finished.
 226     * VMD is an example application that benefits from this. */
 227    if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
 228       struct swr_screen *screen = swr_screen(pipe->screen);
 229       swr_fence_submit(ctx, screen->flush_fence);
 230       swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
 231    }
 232 }
 233
 234
 235 static void
 236 swr_flush(struct pipe_context *pipe,
 237           struct pipe_fence_handle **fence,
 238           unsigned flags)
 239 {
 240    struct swr_context *ctx = swr_context(pipe);
 241    struct swr_screen *screen = swr_screen(pipe->screen);
 242    struct pipe_surface *cb = ctx->framebuffer.cbufs[0];
 243
 244    /* If the current renderTarget is the display surface, store tiles back to
 245     * the surface, in preparation for present (swr_flush_frontbuffer).
 246     * Other renderTargets get stored back when attachment changes or
 247     * swr_surface_destroy */
 248    if (cb && swr_resource(cb->texture)->display_target)
 249       swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);
 250
 251    if (fence)
 252       swr_fence_reference(pipe->screen, fence, screen->flush_fence);
 253 }
 254
 255 void
 256 swr_finish(struct pipe_context *pipe)
 257 {
 258    struct pipe_fence_handle *fence = nullptr;
 259
 260    swr_flush(pipe, &fence, 0);
 261    swr_fence_finish(pipe->screen, NULL, fence, 0);
 262    swr_fence_reference(pipe->screen, &fence, NULL);
 263 }
 264
 265 /*
 266  * Invalidate tiles so they can be reloaded back when needed
 267  */
 268 void
 269 swr_invalidate_render_target(struct pipe_context *pipe,
 270                              uint32_t attachment,
 271                              uint16_t width, uint16_t height)
 272 {
 273    struct swr_context *ctx = swr_context(pipe);
 274
 275    /* grab the rect from the passed in arguments */
 276    swr_update_draw_context(ctx);
 277    SWR_RECT full_rect =
 278       {0, 0, (int32_t)width, (int32_t)height};
 279    ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,
 280                                   1 << attachment,
 281                                   full_rect);
 282 }
 283
 284
 285 /*
 286  * Store SWR HotTiles back to renderTarget surface.
 287  */
 288 void
 289 swr_store_render_target(struct pipe_context *pipe,
 290                         uint32_t attachment,
 291                         enum SWR_TILE_STATE post_tile_state)
 292 {
 293    struct swr_context *ctx = swr_context(pipe);
 294    struct swr_draw_context *pDC = &ctx->swrDC;
 295    struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
 296
 297    /* Only proceed if there's a valid surface to store to */
 298    if (renderTarget->xpBaseAddress) {
 299       swr_update_draw_context(ctx);
 300       SWR_RECT full_rect =
 301          {0, 0,
 302           (int32_t)u_minify(renderTarget->width, renderTarget->lod),
 303           (int32_t)u_minify(renderTarget->height, renderTarget->lod)};
 304       ctx->api.pfnSwrStoreTiles(ctx->swrContext,
 305                                 1 << attachment,
 306                                 post_tile_state,
 307                                 full_rect);
 308    }
 309 }
 310
 311 void
 312 swr_store_dirty_resource(struct pipe_context *pipe,
 313                          struct pipe_resource *resource,
 314                          enum SWR_TILE_STATE post_tile_state)
 315 {
 316    /* Only store resource if it has been written to */
 317    if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {
 318       struct swr_context *ctx = swr_context(pipe);
 319       struct swr_screen *screen = swr_screen(pipe->screen);
 320       struct swr_resource *spr = swr_resource(resource);
 321
 322       swr_draw_context *pDC = &ctx->swrDC;
 323       SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
 324       for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
 325          if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||
 326              (spr->secondary.xpBaseAddress &&
 327               renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {
 328             swr_store_render_target(pipe, i, post_tile_state);
 329
 330             /* Mesa thinks depth/stencil are fused, so we'll never get an
 331              * explicit resource for stencil.  So, if checking depth, then
 332              * also check for stencil. */
 333             if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
 334                swr_store_render_target(
 335                   pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);
 336             }
 337
 338             /* This fence signals StoreTiles completion */
 339             swr_fence_submit(ctx, screen->flush_fence);
 340
 341             break;
 342          }
 343    }
 344 }
 345
 346 void
 347 swr_draw_init(struct pipe_context *pipe)
 348 {
 349    pipe->draw_vbo = swr_draw_vbo;
 350    pipe->flush = swr_flush;
 351 }