X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fswr%2Fswr_context.cpp;h=b61720cd3000fcca25e1457e0103fcca49342d8e;hb=a700a82bdac19433533ccf31ab635350cb58203b;hp=6c0782a40d31188760d3f819776baeeb0438c1fc;hpb=c8a51fa75d2642d71ac23a04f37f570613ac9a87;p=mesa.git diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index 6c0782a40d3..b61720cd300 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -21,16 +21,6 @@ * IN THE SOFTWARE. ***************************************************************************/ -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_atomic.h" - -extern "C" { -#include "util/u_transfer.h" -#include "util/u_surface.h" -} - #include "swr_context.h" #include "swr_memory.h" #include "swr_screen.h" @@ -39,8 +29,17 @@ extern "C" { #include "swr_query.h" #include "swr_fence.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_atomic.h" +#include "util/u_upload_mgr.h" +#include "util/u_transfer.h" +#include "util/u_surface.h" + #include "api.h" #include "backend.h" +#include "knobs.h" static struct pipe_surface * swr_create_surface(struct pipe_context *pipe, @@ -62,10 +61,6 @@ swr_create_surface(struct pipe_context *pipe, ps->u.tex.level = surf_tmpl->u.tex.level; ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; - if (ps->u.tex.first_layer != ps->u.tex.last_layer) { - debug_printf("creating surface with multiple layers, rendering " - "to first layer only\n"); - } } else { /* setting width as number of elements should get us correct * renderbuffer width */ @@ -139,50 +134,102 @@ swr_transfer_map(struct pipe_context *pipe, if (!pt) return NULL; pipe_resource_reference(&pt->resource, resource); + pt->usage = (pipe_transfer_usage)usage; pt->level = level; pt->box = *box; - pt->stride = spr->row_stride[level]; - pt->layer_stride = spr->img_stride[level]; - - /* if we're mapping the depth/stencil, copy in stencil */ - if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT - && spr->has_stencil) { - for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { - spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i]; - } - } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT - && spr->has_stencil) { - for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { - spr->swr.pBaseAddress[8 * i + 4] = spr->secondary.pBaseAddress[i]; + pt->stride = spr->swr.pitch; + pt->layer_stride = spr->swr.qpitch * spr->swr.pitch; + + /* if we're mapping the depth/stencil, copy in stencil for the section + * being read in + */ + if (usage & PIPE_TRANSFER_READ && spr->has_depth && spr->has_stencil) { + size_t zbase, sbase; + for (int z = box->z; z < box->z + box->depth; z++) { + zbase = (z * spr->swr.qpitch + box->y) * spr->swr.pitch + + spr->mip_offsets[level]; + sbase = (z * spr->secondary.qpitch + box->y) * spr->secondary.pitch + + spr->secondary_mip_offsets[level]; + for (int y = box->y; y < box->y + box->height; y++) { + if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { + for (int x = box->x; x < box->x + box->width; x++) + ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 4 * x + 3] = + ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x]; + } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + for (int x = box->x; x < box->x + box->width; x++) + ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 8 * x + 4] = + ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x]; + } + zbase += spr->swr.pitch; + sbase += spr->secondary.pitch; + } } } - unsigned offset = box->z * pt->layer_stride + box->y * pt->stride - + box->x * util_format_get_blocksize(format); + unsigned offset = box->z * pt->layer_stride + + util_format_get_nblocksy(format, box->y) * pt->stride + + util_format_get_stride(format, box->x); *transfer = pt; - return spr->swr.pBaseAddress + offset + spr->mip_offsets[level]; + return (void*)(spr->swr.xpBaseAddress + offset + spr->mip_offsets[level]); } static void -swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) +swr_transfer_flush_region(struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *flush_box) { assert(transfer->resource); + assert(transfer->usage & PIPE_TRANSFER_WRITE); - struct swr_resource *res = swr_resource(transfer->resource); - /* if we're mapping the depth/stencil, copy out stencil */ - if (res->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT - && res->has_stencil) { - for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { - res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[4 * i + 3]; - } - } else if (res->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT - && res->has_stencil) { - for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { - res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[8 * i + 4]; + struct swr_resource *spr = swr_resource(transfer->resource); + if (!spr->has_depth || !spr->has_stencil) + return; + + size_t zbase, sbase; + struct pipe_box box = *flush_box; + box.x += transfer->box.x; + box.y += transfer->box.y; + box.z += transfer->box.z; + for (int z = box.z; z < box.z + box.depth; z++) { + zbase = (z * spr->swr.qpitch + box.y) * spr->swr.pitch + + spr->mip_offsets[transfer->level]; + sbase = (z * spr->secondary.qpitch + box.y) * spr->secondary.pitch + + spr->secondary_mip_offsets[transfer->level]; + for (int y = box.y; y < box.y + box.height; y++) { + if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { + for (int x = box.x; x < box.x + box.width; x++) + ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x] = + ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 4 * x + 3]; + } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + for (int x = box.x; x < box.x + box.width; x++) + ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x] = + ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 8 * x + 4]; + } + zbase += spr->swr.pitch; + sbase += spr->secondary.pitch; } } +} + +static void +swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) +{ + assert(transfer->resource); + + struct swr_resource *spr = swr_resource(transfer->resource); + /* if we're mapping the depth/stencil, copy in stencil for the section + * being written out + */ + if (transfer->usage & PIPE_TRANSFER_WRITE && + !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && + spr->has_depth && spr->has_stencil) { + struct pipe_box box; + u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, + transfer->box.depth, &box); + swr_transfer_flush_region(pipe, transfer, &box); + } pipe_resource_reference(&transfer->resource, NULL); FREE(transfer); @@ -225,16 +272,27 @@ static void swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) { struct swr_context *ctx = swr_context(pipe); + /* Make a copy of the const blit_info, so we can modify it */ struct pipe_blit_info info = *blit_info; - if (blit_info->render_condition_enable && !swr_check_render_cond(pipe)) + if (info.render_condition_enable && !swr_check_render_cond(pipe)) return; if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1 && !util_format_is_depth_or_stencil(info.src.resource->format) && !util_format_is_pure_integer(info.src.resource->format)) { - debug_printf("swr: color resolve unimplemented\n"); - return; + debug_printf("swr_blit: color resolve : %d -> %d\n", + info.src.resource->nr_samples, info.dst.resource->nr_samples); + + /* Resolve is done as part of the surface store. */ + swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED); + + struct pipe_resource *src_resource = info.src.resource; + struct pipe_resource *resolve_target = + swr_resource(src_resource)->resolve_target; + + /* The resolve target becomes the new source for the blit. */ + info.src.resource = resolve_target; } if (util_try_blit_via_copy_region(pipe, &info)) { @@ -253,12 +311,15 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) return; } - /* XXX turn off occlusion and streamout queries */ + if (ctx->active_queries) { + ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, FALSE); + ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, FALSE); + } util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer); util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems); util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs); - /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/ + util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs); util_blitter_save_so_targets( ctx->blitter, ctx->num_so_targets, @@ -287,6 +348,11 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) ctx->render_cond_mode); util_blitter_blit(ctx->blitter, &info); + + if (ctx->active_queries) { + ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, TRUE); + ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, TRUE); + } } @@ -299,14 +365,21 @@ swr_destroy(struct pipe_context *pipe) if (ctx->blitter) util_blitter_destroy(ctx->blitter); - /* Idle core before deleting context */ - SwrWaitForIdle(ctx->swrContext); - for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL); + if (ctx->framebuffer.cbufs[i]) { + struct swr_resource *res = swr_resource(ctx->framebuffer.cbufs[i]->texture); + /* NULL curr_pipe, so we don't have a reference to a deleted pipe */ + res->curr_pipe = NULL; + pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL); + } } - pipe_surface_reference(&ctx->framebuffer.zsbuf, NULL); + if (ctx->framebuffer.zsbuf) { + struct swr_resource *res = swr_resource(ctx->framebuffer.zsbuf->texture); + /* NULL curr_pipe, so we don't have a reference to a deleted pipe */ + res->curr_pipe = NULL; + pipe_surface_reference(&ctx->framebuffer.zsbuf, NULL); + } for (unsigned i = 0; i < ARRAY_SIZE(ctx->sampler_views[0]); i++) { pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL); @@ -316,8 +389,15 @@ swr_destroy(struct pipe_context *pipe) pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_VERTEX][i], NULL); } + if (ctx->pipe.stream_uploader) + u_upload_destroy(ctx->pipe.stream_uploader); + + /* Idle core after destroying buffer resources, but before deleting + * context. Destroying resources has potentially called StoreTiles.*/ + ctx->api.pfnSwrWaitForIdle(ctx->swrContext); + if (ctx->swrContext) - SwrDestroyContext(ctx->swrContext); + ctx->api.pfnSwrDestroyContext(ctx->swrContext); delete ctx->blendJIT; @@ -328,7 +408,7 @@ swr_destroy(struct pipe_context *pipe) if (screen->pipe == pipe) screen->pipe = NULL; - FREE(ctx); + AlignedFree(ctx); } @@ -336,7 +416,7 @@ static void swr_render_condition(struct pipe_context *pipe, struct pipe_query *query, boolean condition, - uint mode) + enum pipe_render_cond_flag mode) { struct swr_context *ctx = swr_context(pipe); @@ -353,7 +433,7 @@ swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats) if (!pDC) return; - struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats; + struct swr_query_result *pqr = pDC->pStats; SWR_STATS *pSwrStats = &pqr->core; @@ -370,7 +450,7 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats) if (!pDC) return; - struct swr_query_result *pqr = (struct swr_query_result *)pDC->pStats; + struct swr_query_result *pqr = pDC->pStats; SWR_STATS_FE *pSwrStats = &pqr->coreFE; p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices); @@ -394,10 +474,18 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats) struct pipe_context * swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) { - struct swr_context *ctx = CALLOC_STRUCT(swr_context); + struct swr_context *ctx = (struct swr_context *) + AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES); + memset(ctx, 0, sizeof(struct swr_context)); + + swr_screen(p_screen)->pfnSwrGetInterface(ctx->api); + ctx->swrDC.pAPI = &ctx->api; + ctx->blendJIT = new std::unordered_map; + ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT; + SWR_CREATECONTEXT_INFO createInfo; memset(&createInfo, 0, sizeof(createInfo)); createInfo.privateStateSize = sizeof(swr_draw_context); @@ -406,12 +494,33 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) createInfo.pfnClearTile = swr_StoreHotTileClear; createInfo.pfnUpdateStats = swr_UpdateStats; createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE; - ctx->swrContext = SwrCreateContext(&createInfo); - /* Init Load/Store/ClearTiles Tables */ - swr_InitMemoryModule(); + SWR_THREADING_INFO threadingInfo {0}; + + threadingInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS; + threadingInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES; + threadingInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE; + threadingInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE; + threadingInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED; + + // Use non-standard settings for KNL + if (swr_screen(p_screen)->is_knl) + { + if (nullptr == getenv("KNOB_MAX_THREADS_PER_CORE")) + threadingInfo.MAX_THREADS_PER_CORE = 2; - InitBackendFuncTables(); + if (nullptr == getenv("KNOB_MAX_DRAWS_IN_FLIGHT")) + { + ctx->max_draws_in_flight = 2048; + createInfo.MAX_DRAWS_IN_FLIGHT = ctx->max_draws_in_flight; + } + } + + createInfo.pThreadInfo = &threadingInfo; + + ctx->swrContext = ctx->api.pfnSwrCreateContext(&createInfo); + + ctx->api.pfnSwrInit(); if (ctx->swrContext == NULL) goto fail; @@ -423,11 +532,12 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) ctx->pipe.surface_destroy = swr_surface_destroy; ctx->pipe.transfer_map = swr_transfer_map; ctx->pipe.transfer_unmap = swr_transfer_unmap; + ctx->pipe.transfer_flush_region = swr_transfer_flush_region; - ctx->pipe.transfer_flush_region = u_default_transfer_flush_region; ctx->pipe.buffer_subdata = u_default_buffer_subdata; ctx->pipe.texture_subdata = u_default_texture_subdata; + ctx->pipe.clear_texture = util_clear_texture; ctx->pipe.resource_copy_region = swr_resource_copy; ctx->pipe.render_condition = swr_render_condition; @@ -436,6 +546,11 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) swr_draw_init(&ctx->pipe); swr_query_init(&ctx->pipe); + ctx->pipe.stream_uploader = u_upload_create_default(&ctx->pipe); + if (!ctx->pipe.stream_uploader) + goto fail; + ctx->pipe.const_uploader = ctx->pipe.stream_uploader; + ctx->pipe.blit = swr_blit; ctx->blitter = util_blitter_create(&ctx->pipe); if (!ctx->blitter)