X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fswr%2Fswr_state.cpp;h=d7baa7127b495c479427ddcf6cd4657256bf8480;hb=575f8e8b60ab06e559a62ffe90913453352f32b4;hp=19d961f05aee55746603430b279538d5e3d2e6e3;hpb=08a466aec0b1baf54a7ca7b0d7d43bb267e01841;p=mesa.git diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 19d961f05ae..d7baa7127b4 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -31,6 +31,7 @@ #include "jit_api.h" #include "gen_state_llvm.h" #include "core/multisample.h" +#include "core/state_funcs.h" #include "gallivm/lp_bld_tgsi.h" #include "util/u_format.h" @@ -344,8 +345,10 @@ swr_create_vs_state(struct pipe_context *pipe, // soState.streamToRasterizer not used for (uint32_t i = 0; i < stream_output->num_outputs; i++) { + unsigned attrib_slot = stream_output->output[i].register_index; + attrib_slot = swr_so_adjust_attrib(attrib_slot, swr_vs); swr_vs->soState.streamMasks[stream_output->output[i].stream] |= - 1 << (stream_output->output[i].register_index - 1); + (1 << attrib_slot); } for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) { swr_vs->soState.streamNumEntries[i] = @@ -528,7 +531,7 @@ swr_create_vertex_elements_state(struct pipe_context *pipe, ? ComponentControl::StoreSrc : ComponentControl::Store1Fp; velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW; - velems->fsState.layout[i].InstanceDataStepRate = + velems->fsState.layout[i].InstanceAdvancementState = attribs[i].instance_divisor; /* Calculate the pitch of each stream */ @@ -792,7 +795,7 @@ swr_update_texture_state(struct swr_context *ctx, jit_tex->width = res->width0; jit_tex->height = res->height0; - jit_tex->base_ptr = swr->pBaseAddress; + jit_tex->base_ptr = (uint8_t*)swr->xpBaseAddress; if (view->target != PIPE_BUFFER) { jit_tex->first_level = view->u.tex.first_level; jit_tex->last_level = view->u.tex.last_level; @@ -899,7 +902,7 @@ swr_change_rt(struct swr_context *ctx, struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment]; /* Do nothing if the render target hasn't changed */ - if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr) + if ((!sf || !sf->texture) && (void*)(rt->xpBaseAddress) == nullptr) return false; /* Deal with disabling RT up front */ @@ -915,12 +918,12 @@ swr_change_rt(struct swr_context *ctx, const SWR_SURFACE_STATE *swr_surface = &swr->swr; SWR_FORMAT fmt = mesa_to_swr_format(sf->format); - if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) { + if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.xpBaseAddress) { swr_surface = &swr->secondary; fmt = swr_surface->format; } - if (rt->pBaseAddress == swr_surface->pBaseAddress && + if (rt->xpBaseAddress == swr_surface->xpBaseAddress && rt->format == fmt && rt->lod == sf->u.tex.level && rt->arrayIndex == sf->u.tex.first_layer) @@ -929,11 +932,16 @@ swr_change_rt(struct swr_context *ctx, bool need_fence = false; /* StoreTile for changed target */ - if (rt->pBaseAddress) { + if (rt->xpBaseAddress) { /* If changing attachment to a new target, mark tiles as * INVALID so they are reloaded from surface. */ swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID); need_fence = true; + } else { + /* if no previous attachment, invalidate tiles that may be marked + * RESOLVED because of an old attachment */ + swr_invalidate_render_target(&ctx->pipe, attachment, sf->width, sf->height); + /* no need to set fence here */ } /* Make new attachment */ @@ -945,6 +953,47 @@ swr_change_rt(struct swr_context *ctx, return need_fence; } +/* + * for cases where resources are shared between contexts, invalidate + * this ctx's resource. so it can be fetched fresh. Old ctx's resource + * is already stored during a flush + */ +static inline void +swr_invalidate_buffers_after_ctx_change(struct pipe_context *pipe) +{ + struct swr_context *ctx = swr_context(pipe); + + for (uint32_t i = 0; i < ctx->framebuffer.nr_cbufs; i++) { + struct pipe_surface *cb = ctx->framebuffer.cbufs[i]; + if (cb) { + struct swr_resource *res = swr_resource(cb->texture); + if (res->curr_pipe != pipe) { + /* if curr_pipe is NULL (first use), status should not be WRITE */ + assert(res->curr_pipe || !(res->status & SWR_RESOURCE_WRITE)); + if (res->status & SWR_RESOURCE_WRITE) { + swr_invalidate_render_target(pipe, i, cb->width, cb->height); + } + } + res->curr_pipe = pipe; + } + } + if (ctx->framebuffer.zsbuf) { + struct pipe_surface *zb = ctx->framebuffer.zsbuf; + if (zb) { + struct swr_resource *res = swr_resource(zb->texture); + if (res->curr_pipe != pipe) { + /* if curr_pipe is NULL (first use), status should not be WRITE */ + assert(res->curr_pipe || !(res->status & SWR_RESOURCE_WRITE)); + if (res->status & SWR_RESOURCE_WRITE) { + swr_invalidate_render_target(pipe, SWR_ATTACHMENT_DEPTH, zb->width, zb->height); + swr_invalidate_render_target(pipe, SWR_ATTACHMENT_STENCIL, zb->width, zb->height); + } + } + res->curr_pipe = pipe; + } + } +} + static inline void swr_user_vbuf_range(const struct pipe_draw_info *info, const struct swr_vertex_element_state *velems, @@ -963,8 +1012,8 @@ swr_user_vbuf_range(const struct pipe_draw_info *info, *size = elems * vb->stride; } else if (vb->stride) { elems = info->max_index - info->min_index + 1; - *totelems = info->max_index + 1; - *base = info->min_index * vb->stride; + *totelems = (info->max_index + info->index_bias) + 1; + *base = (info->min_index + info->index_bias) * vb->stride; *size = elems * vb->stride; } else { *totelems = 1; @@ -1025,13 +1074,15 @@ swr_update_derived(struct pipe_context *pipe, } /* Update screen->pipe to current pipe context. */ - if (screen->pipe != pipe) - screen->pipe = pipe; + screen->pipe = pipe; /* Any state that requires dirty flags to be re-triggered sets this mask */ /* For example, user_buffer vertex and index buffers. */ unsigned post_update_dirty_flags = 0; + /* bring resources that changed context up-to-date */ + swr_invalidate_buffers_after_ctx_change(pipe); + /* Render Targets */ if (ctx->dirty & SWR_NEW_FRAMEBUFFER) { struct pipe_framebuffer_state *fb = &ctx->framebuffer; @@ -1147,23 +1198,10 @@ swr_update_derived(struct pipe_context *pipe, if (zb && swr_resource(zb->texture)->has_depth) rastState->depthFormat = swr_resource(zb->texture)->swr.format; - rastState->depthClipEnable = rasterizer->depth_clip; + rastState->depthClipEnable = rasterizer->depth_clip_near; rastState->clipHalfZ = rasterizer->clip_halfz; - rastState->clipDistanceMask = - ctx->vs->info.base.num_written_clipdistance ? - ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable : - rasterizer->clip_plane_enable; - - rastState->cullDistanceMask = - ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance; - - SwrSetRastState(ctx->swrContext, rastState); - } - - /* Scissor */ - if (ctx->dirty & SWR_NEW_SCISSOR) { - SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor); + ctx->api.pfnSwrSetRastState(ctx->swrContext, rastState); } /* Viewport */ @@ -1203,40 +1241,29 @@ swr_update_derived(struct pipe_context *pipe, vp->width = std::min(vp->width, (float)fb->width - vp->x); vp->height = std::min(vp->height, (float)fb->height - vp->y); - SwrSetViewports(ctx->swrContext, 1, vp, vpm); + ctx->api.pfnSwrSetViewports(ctx->swrContext, 1, vp, vpm); } - /* Set vertex & index buffers */ - /* (using draw info if called by swr_draw_vbo) */ - /* TODO: This is always true, because the index buffer comes from - * pipe_draw_info. - */ - if (1 || ctx->dirty & SWR_NEW_VERTEX) { - uint32_t scratch_total; - uint8_t *scratch = NULL; - - /* If being called by swr_draw_vbo, copy draw details */ - struct pipe_draw_info info = {0}; - if (p_draw_info) - info = *p_draw_info; - - /* We must get all the scratch space in one go */ - scratch_total = 0; - for (UINT i = 0; i < ctx->num_vertex_buffers; i++) { - struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i]; - - if (!vb->is_user_buffer) - continue; + /* When called from swr_clear (p_draw_info = null), render targets, + * rasterState and viewports (dependent on render targets) are the only + * necessary validation. Defer remaining validation by setting + * post_update_dirty_flags and clear all dirty flags. BackendState is + * still unconditionally validated below */ + if (!p_draw_info) { + post_update_dirty_flags = ctx->dirty & ~(SWR_NEW_FRAMEBUFFER | + SWR_NEW_RASTERIZER | + SWR_NEW_VIEWPORT); + ctx->dirty = 0; + } - uint32_t elems, base, size; - swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size); - scratch_total += AlignUp(size, 4); - } + /* Scissor */ + if (ctx->dirty & SWR_NEW_SCISSOR) { + ctx->api.pfnSwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor); + } - if (scratch_total) { - scratch = (uint8_t *)swr_copy_to_scratch_space( - ctx, &ctx->scratch->vertex_buffer, NULL, scratch_total); - } + /* Set vertex & index buffers */ + if (ctx->dirty & SWR_NEW_VERTEX) { + const struct pipe_draw_info &info = *p_draw_info; /* vertex buffers */ SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS]; @@ -1277,28 +1304,35 @@ swr_update_derived(struct pipe_context *pipe, uint32_t base; swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size); partial_inbounds = 0; - min_vertex_index = info.min_index; + min_vertex_index = info.min_index + info.index_bias; - /* Copy only needed vertices to scratch space */ size = AlignUp(size, 4); - const void *ptr = (const uint8_t *) vb->buffer.user + base; - memcpy(scratch, ptr, size); - ptr = scratch; - scratch += size; - p_data = (const uint8_t *)ptr - base; + /* If size of client memory copy is too large, don't copy. The + * draw will access user-buffer directly and then block. This is + * faster than queuing many large client draws. */ + if (size >= screen->client_copy_limit) { + post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW; + p_data = (const uint8_t *) vb->buffer.user; + } else { + /* Copy only needed vertices to scratch space */ + const void *ptr = (const uint8_t *) vb->buffer.user + base; + ptr = (uint8_t *)swr_copy_to_scratch_space( + ctx, &ctx->scratch->vertex_buffer, ptr, size); + p_data = (const uint8_t *)ptr - base; + } } swrVertexBuffers[i] = {0}; swrVertexBuffers[i].index = i; swrVertexBuffers[i].pitch = pitch; - swrVertexBuffers[i].pData = p_data; + swrVertexBuffers[i].xpData = (gfxptr_t) p_data; swrVertexBuffers[i].size = size; swrVertexBuffers[i].minVertex = min_vertex_index; swrVertexBuffers[i].maxVertex = elems; swrVertexBuffers[i].partialInboundsSize = partial_inbounds; } - SwrSetVertexBuffers( + ctx->api.pfnSwrSetVertexBuffers( ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers); /* index buffer, if required (info passed in by swr_draw_vbo) */ @@ -1307,7 +1341,7 @@ swr_update_derived(struct pipe_context *pipe, const uint8_t *p_data; uint32_t size, pitch; - pitch = p_draw_info->index_size ? p_draw_info->index_size : sizeof(uint32_t); + pitch = info.index_size ? info.index_size : sizeof(uint32_t); index_type = swr_convert_index_type(pitch); if (!info.has_user_indices) { @@ -1324,20 +1358,27 @@ swr_update_derived(struct pipe_context *pipe, size = info.count * pitch; size = AlignUp(size, 4); - - /* Copy indices to scratch space */ - const void *ptr = info.index.user; - ptr = swr_copy_to_scratch_space( - ctx, &ctx->scratch->index_buffer, ptr, size); - p_data = (const uint8_t *)ptr; + /* If size of client memory copy is too large, don't copy. The + * draw will access user-buffer directly and then block. This is + * faster than queuing many large client draws. */ + if (size >= screen->client_copy_limit) { + post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW; + p_data = (const uint8_t *) info.index.user; + } else { + /* Copy indices to scratch space */ + const void *ptr = info.index.user; + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->index_buffer, ptr, size); + p_data = (const uint8_t *)ptr; + } } SWR_INDEX_BUFFER_STATE swrIndexBuffer; - swrIndexBuffer.format = swr_convert_index_type(p_draw_info->index_size); - swrIndexBuffer.pIndices = p_data; + swrIndexBuffer.format = swr_convert_index_type(info.index_size); + swrIndexBuffer.xpIndices = (gfxptr_t) p_data; swrIndexBuffer.size = size; - SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer); + ctx->api.pfnSwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer); } struct swr_vertex_element_state *velems = ctx->velems; @@ -1362,7 +1403,7 @@ swr_update_derived(struct pipe_context *pipe, } else { func = swr_compile_gs(ctx, key); } - SwrSetGsFunc(ctx->swrContext, func); + ctx->api.pfnSwrSetGsFunc(ctx->swrContext, func); /* JIT sampler state */ if (ctx->dirty & SWR_NEW_SAMPLER) { @@ -1380,11 +1421,11 @@ swr_update_derived(struct pipe_context *pipe, ctx->swrDC.texturesGS); } - SwrSetGsState(ctx->swrContext, &ctx->gs->gsState); + ctx->api.pfnSwrSetGsState(ctx->swrContext, &ctx->gs->gsState); } else { SWR_GS_STATE state = { 0 }; - SwrSetGsState(ctx->swrContext, &state); - SwrSetGsFunc(ctx->swrContext, NULL); + ctx->api.pfnSwrSetGsState(ctx->swrContext, &state); + ctx->api.pfnSwrSetGsFunc(ctx->swrContext, NULL); } } @@ -1403,7 +1444,7 @@ swr_update_derived(struct pipe_context *pipe, } else { func = swr_compile_vs(ctx, key); } - SwrSetVertexFunc(ctx->swrContext, func); + ctx->api.pfnSwrSetVertexFunc(ctx->swrContext, func); /* JIT sampler state */ if (ctx->dirty & SWR_NEW_SAMPLER) { @@ -1459,7 +1500,7 @@ swr_update_derived(struct pipe_context *pipe, psState.writesODepth = ctx->fs->info.base.writes_z; psState.usesSourceDepth = ctx->fs->info.base.reads_z; psState.shadingRate = SWR_SHADING_RATE_PIXEL; - psState.numRenderTargets = ctx->framebuffer.nr_cbufs; + psState.renderTargetMask = (1 << ctx->framebuffer.nr_cbufs) - 1; psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; uint32_t barycentricsMask = 0; #if 0 @@ -1491,7 +1532,7 @@ swr_update_derived(struct pipe_context *pipe, psState.barycentricsMask = barycentricsMask; psState.usesUAV = false; // XXX psState.forceEarlyZ = false; - SwrSetPixelShaderState(ctx->swrContext, &psState); + ctx->api.pfnSwrSetPixelShaderState(ctx->swrContext, &psState); /* JIT sampler state */ if (ctx->dirty & (SWR_NEW_SAMPLER | @@ -1580,12 +1621,12 @@ swr_update_derived(struct pipe_context *pipe, depthStencilState.depthTestEnable = depth->enabled; depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func); depthStencilState.depthWriteEnable = depth->writemask; - SwrSetDepthStencilState(ctx->swrContext, &depthStencilState); + ctx->api.pfnSwrSetDepthStencilState(ctx->swrContext, &depthStencilState); depthBoundsState.depthBoundsTestEnable = depth->bounds_test; depthBoundsState.depthBoundsTestMinValue = depth->bounds_min; depthBoundsState.depthBoundsTestMaxValue = depth->bounds_max; - SwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState); + ctx->api.pfnSwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState); } /* Blend State */ @@ -1614,7 +1655,7 @@ swr_update_derived(struct pipe_context *pipe, blendState.renderTarget[0].writeDisableGreen = 1; blendState.renderTarget[0].writeDisableBlue = 1; blendState.renderTarget[0].writeDisableAlpha = 1; - SwrSetBlendFunc(ctx->swrContext, 0, NULL); + ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, 0, NULL); } else for (int target = 0; @@ -1646,7 +1687,7 @@ swr_update_derived(struct pipe_context *pipe, if (compileState.blendState.blendEnable == false && compileState.blendState.logicOpEnable == false && ctx->depth_stencil->alpha.enabled == 0) { - SwrSetBlendFunc(ctx->swrContext, target, NULL); + ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, target, NULL); continue; } @@ -1682,10 +1723,10 @@ swr_update_derived(struct pipe_context *pipe, ctx->blendJIT->insert(std::make_pair(compileState, func)); } - SwrSetBlendFunc(ctx->swrContext, target, func); + ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, target, func); } - SwrSetBlendState(ctx->swrContext, &blendState); + ctx->api.pfnSwrSetBlendState(ctx->swrContext, &blendState); } if (ctx->dirty & SWR_NEW_STIPPLE) { @@ -1695,7 +1736,7 @@ swr_update_derived(struct pipe_context *pipe, if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) { ctx->vs->soState.rasterizerDisable = ctx->rasterizer->rasterizer_discard; - SwrSetSoState(ctx->swrContext, &ctx->vs->soState); + ctx->api.pfnSwrSetSoState(ctx->swrContext, &ctx->vs->soState); pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output; @@ -1705,13 +1746,13 @@ swr_update_derived(struct pipe_context *pipe, continue; buffer.enable = true; buffer.pBuffer = - (uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) + + (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) + ctx->so_targets[i]->buffer_offset); buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2; buffer.pitch = stream_output->stride[i]; buffer.streamOffset = 0; - SwrSetSoBuffers(ctx->swrContext, &buffer, i); + ctx->api.pfnSwrSetSoBuffers(ctx->swrContext, &buffer, i); } } @@ -1755,14 +1796,26 @@ swr_update_derived(struct pipe_context *pipe, (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0); backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask; - if (ctx->gs) - backendState.readRenderTargetArrayIndex = - ctx->gs->info.base.writes_layer; - else - backendState.readRenderTargetArrayIndex = - ctx->vs->info.base.writes_layer; + struct tgsi_shader_info *pLastFE = + ctx->gs ? + &ctx->gs->info.base : + &ctx->vs->info.base; + backendState.readRenderTargetArrayIndex = pLastFE->writes_layer; + backendState.readViewportArrayIndex = pLastFE->writes_viewport_index; + backendState.vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize + + backendState.clipDistanceMask = + ctx->vs->info.base.num_written_clipdistance ? + ctx->vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : + ctx->rasterizer->clip_plane_enable; + + backendState.cullDistanceMask = + ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance; + + // Assume old layout of SGV, POSITION, CLIPCULL, ATTRIB + backendState.vertexClipCullOffset = backendState.vertexAttribOffset - 2; - SwrSetBackendState(ctx->swrContext, &backendState); + ctx->api.pfnSwrSetBackendState(ctx->swrContext, &backendState); /* Ensure that any in-progress attachment change StoreTiles finish */ if (swr_is_fence_pending(screen->flush_fence))