From b19d214b238228bfebfe3869b6aee540993fe706 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Wed, 30 Mar 2016 22:40:25 -0500 Subject: [PATCH] swr: support samplers in vertex shaders Reviewed-by: George Kyriazis --- src/gallium/drivers/swr/swr_shader.cpp | 98 ++++++++----- src/gallium/drivers/swr/swr_shader.h | 41 ++++-- src/gallium/drivers/swr/swr_state.cpp | 153 ++++++++++++++------- src/gallium/drivers/swr/swr_state.h | 6 +- src/gallium/drivers/swr/swr_tex_sample.cpp | 33 ++++- src/gallium/drivers/swr/swr_tex_sample.h | 2 +- 6 files changed, 228 insertions(+), 105 deletions(-) diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index ff16d0f2f11..90f0f22a6a2 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -40,32 +40,29 @@ #include "swr_state.h" #include "swr_screen.h" -bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs) +bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs) { return !memcmp(&lhs, &rhs, sizeof(lhs)); } -void -swr_generate_fs_key(struct swr_jit_key &key, - struct swr_context *ctx, - swr_fragment_shader *swr_fs) +bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs) { - key.nr_cbufs = ctx->framebuffer.nr_cbufs; - key.light_twoside = ctx->rasterizer->light_twoside; - memcpy(&key.vs_output_semantic_name, - &ctx->vs->info.base.output_semantic_name, - sizeof(key.vs_output_semantic_name)); - memcpy(&key.vs_output_semantic_idx, - &ctx->vs->info.base.output_semantic_index, - sizeof(key.vs_output_semantic_idx)); + return !memcmp(&lhs, &rhs, sizeof(lhs)); +} - key.nr_samplers = swr_fs->info.base.file_max[TGSI_FILE_SAMPLER] + 1; +static void +swr_generate_sampler_key(const struct lp_tgsi_info &info, + struct swr_context *ctx, + unsigned shader_type, + struct swr_jit_sampler_key &key) +{ + key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1; for (unsigned i = 0; i < key.nr_samplers; i++) { - if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { lp_sampler_static_sampler_state( &key.sampler[i].sampler_state, - ctx->samplers[PIPE_SHADER_FRAGMENT][i]); + ctx->samplers[shader_type][i]); } } @@ -74,28 +71,57 @@ swr_generate_fs_key(struct swr_jit_key &key, * are dx10-style? Can't really have mixed opcodes, at least not * if we want to skip the holes here (without rescanning tgsi). */ - if (swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { + if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { key.nr_sampler_views = - swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; for (unsigned i = 0; i < key.nr_sampler_views; i++) { - if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { + if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { lp_sampler_static_texture_state( &key.sampler[i].texture_state, - ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]); + ctx->sampler_views[shader_type][i]); } } } else { key.nr_sampler_views = key.nr_samplers; for (unsigned i = 0; i < key.nr_sampler_views; i++) { - if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { lp_sampler_static_texture_state( &key.sampler[i].texture_state, - ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]); + ctx->sampler_views[shader_type][i]); } } } } +void +swr_generate_fs_key(struct swr_jit_fs_key &key, + struct swr_context *ctx, + swr_fragment_shader *swr_fs) +{ + memset(&key, 0, sizeof(key)); + + key.nr_cbufs = ctx->framebuffer.nr_cbufs; + key.light_twoside = ctx->rasterizer->light_twoside; + memcpy(&key.vs_output_semantic_name, + &ctx->vs->info.base.output_semantic_name, + sizeof(key.vs_output_semantic_name)); + memcpy(&key.vs_output_semantic_idx, + &ctx->vs->info.base.output_semantic_index, + sizeof(key.vs_output_semantic_idx)); + + swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key); +} + +void +swr_generate_vs_key(struct swr_jit_vs_key &key, + struct swr_context *ctx, + swr_vertex_shader *swr_vs) +{ + memset(&key, 0, sizeof(key)); + + swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key); +} + struct BuilderSWR : public Builder { BuilderSWR(JitManager *pJitMgr) : Builder(pJitMgr) @@ -103,14 +129,15 @@ struct BuilderSWR : public Builder { pJitMgr->SetupNewModule(); } - PFN_VERTEX_FUNC - CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs); - PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_key &key); + PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key); + PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key); }; PFN_VERTEX_FUNC -BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs) +BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) { + struct swr_vertex_shader *swr_vs = ctx->vs; + swr_vs->linkageMask = 0; for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) { @@ -180,6 +207,9 @@ BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs) } } + struct lp_build_sampler_soa *sampler = + swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX); + struct lp_bld_tgsi_system_values system_values; memset(&system_values, 0, sizeof(system_values)); system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID})); @@ -194,9 +224,9 @@ BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs) &system_values, inputs, outputs, - NULL, // wrap(hPrivateData), (sampler context) + wrap(hPrivateData), // (sampler context) NULL, // thread data - NULL, // sampler + sampler, // sampler &swr_vs->info.base, NULL); // geometry shader face @@ -239,11 +269,11 @@ BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs) } PFN_VERTEX_FUNC -swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs) +swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key) { BuilderSWR builder( - reinterpret_cast(swr_screen(ctx->screen)->hJitMgr)); - return builder.CompileVS(ctx, swr_vs); + reinterpret_cast(swr_screen(ctx->pipe.screen)->hJitMgr)); + return builder.CompileVS(ctx, key); } static unsigned @@ -269,7 +299,7 @@ locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info) } PFN_PIXEL_KERNEL -BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key) +BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) { struct swr_fragment_shader *swr_fs = ctx->fs; @@ -478,7 +508,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key) } } - sampler = swr_sampler_soa_create(key.sampler); + sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT); struct lp_bld_tgsi_system_values system_values; memset(&system_values, 0, sizeof(system_values)); @@ -583,7 +613,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key) } PFN_PIXEL_KERNEL -swr_compile_fs(struct swr_context *ctx, swr_jit_key &key) +swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key) { BuilderSWR builder( reinterpret_cast(swr_screen(ctx->pipe.screen)->hJitMgr)); diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h index e22a7c48c2a..4814b9d914c 100644 --- a/src/gallium/drivers/swr/swr_shader.h +++ b/src/gallium/drivers/swr/swr_shader.h @@ -25,36 +25,55 @@ class swr_vertex_shader; class swr_fragment_shader; -class swr_jit_key; +class swr_jit_fs_key; +class swr_jit_vs_key; PFN_VERTEX_FUNC -swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs); +swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key); PFN_PIXEL_KERNEL -swr_compile_fs(struct swr_context *ctx, swr_jit_key &key); +swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key); -void swr_generate_fs_key(struct swr_jit_key &key, +void swr_generate_fs_key(struct swr_jit_fs_key &key, struct swr_context *ctx, swr_fragment_shader *swr_fs); -struct swr_jit_key { +void swr_generate_vs_key(struct swr_jit_vs_key &key, + struct swr_context *ctx, + swr_vertex_shader *swr_vs); + +struct swr_jit_sampler_key { + unsigned nr_samplers; + unsigned nr_sampler_views; + struct swr_sampler_static_state sampler[PIPE_MAX_SHADER_SAMPLER_VIEWS]; +}; + +struct swr_jit_fs_key : swr_jit_sampler_key { unsigned nr_cbufs; unsigned light_twoside; ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS]; - unsigned nr_samplers; - unsigned nr_sampler_views; - struct swr_sampler_static_state sampler[PIPE_MAX_SHADER_SAMPLER_VIEWS]; +}; + +struct swr_jit_vs_key : swr_jit_sampler_key { }; namespace std { -template <> struct hash { - std::size_t operator()(const swr_jit_key &k) const +template <> struct hash { + std::size_t operator()(const swr_jit_fs_key &k) const + { + return util_hash_crc32(&k, sizeof(k)); + } +}; + +template <> struct hash { + std::size_t operator()(const swr_jit_vs_key &k) const { return util_hash_crc32(&k, sizeof(k)); } }; }; -bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs); +bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs); +bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs); diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index e7bf3618a7d..ded51a9b196 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -317,8 +317,7 @@ static void * swr_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *vs) { - struct swr_vertex_shader *swr_vs = - (swr_vertex_shader *)CALLOC_STRUCT(swr_vertex_shader); + struct swr_vertex_shader *swr_vs = new swr_vertex_shader; if (!swr_vs) return NULL; @@ -327,8 +326,6 @@ swr_create_vs_state(struct pipe_context *pipe, lp_build_tgsi_info(vs->tokens, &swr_vs->info); - swr_vs->func = swr_compile_vs(pipe, swr_vs); - swr_vs->soState = {0}; if (swr_vs->pipe.stream_output.num_outputs) { @@ -368,7 +365,7 @@ swr_delete_vs_state(struct pipe_context *pipe, void *vs) { struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs; FREE((void *)swr_vs->pipe.tokens); - FREE(vs); + delete swr_vs; } static void * @@ -675,6 +672,58 @@ swr_update_resource_status(struct pipe_context *pipe, } } +static void +swr_update_texture_state(struct swr_context *ctx, + unsigned shader_type, + unsigned num_sampler_views, + swr_jit_texture *textures) +{ + for (unsigned i = 0; i < num_sampler_views; i++) { + struct pipe_sampler_view *view = + ctx->sampler_views[shader_type][i]; + + if (view) { + struct pipe_resource *res = view->texture; + struct swr_resource *swr_res = swr_resource(res); + struct swr_jit_texture *jit_tex = &textures[i]; + memset(jit_tex, 0, sizeof(*jit_tex)); + jit_tex->width = res->width0; + jit_tex->height = res->height0; + jit_tex->depth = res->depth0; + jit_tex->first_level = view->u.tex.first_level; + jit_tex->last_level = view->u.tex.last_level; + jit_tex->base_ptr = swr_res->swr.pBaseAddress; + + for (unsigned level = jit_tex->first_level; + level <= jit_tex->last_level; + level++) { + jit_tex->row_stride[level] = swr_res->row_stride[level]; + jit_tex->img_stride[level] = swr_res->img_stride[level]; + jit_tex->mip_offsets[level] = swr_res->mip_offsets[level]; + } + } + } +} + +static void +swr_update_sampler_state(struct swr_context *ctx, + unsigned shader_type, + unsigned num_samplers, + swr_jit_sampler *samplers) +{ + for (unsigned i = 0; i < num_samplers; i++) { + const struct pipe_sampler_state *sampler = + ctx->samplers[shader_type][i]; + + if (sampler) { + samplers[i].min_lod = sampler->min_lod; + samplers[i].max_lod = sampler->max_lod; + samplers[i].lod_bias = sampler->lod_bias; + COPY_4V(samplers[i].border_color, sampler->border_color.f); + } + } +} + void swr_update_derived(struct pipe_context *pipe, const struct pipe_draw_info *p_draw_info) @@ -974,14 +1023,43 @@ swr_update_derived(struct pipe_context *pipe, } /* VertexShader */ - if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_FRAMEBUFFER)) { - SwrSetVertexFunc(ctx->swrContext, ctx->vs->func); + if (ctx->dirty & (SWR_NEW_VS | + SWR_NEW_SAMPLER | + SWR_NEW_SAMPLER_VIEW | + SWR_NEW_FRAMEBUFFER)) { + swr_jit_vs_key key; + swr_generate_vs_key(key, ctx, ctx->vs); + auto search = ctx->vs->map.find(key); + PFN_VERTEX_FUNC func; + if (search != ctx->vs->map.end()) { + func = search->second; + } else { + func = swr_compile_vs(ctx, key); + ctx->vs->map.insert(std::make_pair(key, func)); + } + SwrSetVertexFunc(ctx->swrContext, func); + + /* JIT sampler state */ + if (ctx->dirty & SWR_NEW_SAMPLER) { + swr_update_sampler_state(ctx, + PIPE_SHADER_VERTEX, + key.nr_samplers, + ctx->swrDC.samplersVS); + } + + /* JIT sampler view state */ + if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { + swr_update_texture_state(ctx, + PIPE_SHADER_VERTEX, + key.nr_sampler_views, + ctx->swrDC.texturesVS); + } } - swr_jit_key key; + /* FragmentShader */ if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) { - memset(&key, 0, sizeof(key)); + swr_jit_fs_key key; swr_generate_fs_key(key, ctx, ctx->fs); auto search = ctx->fs->map.find(key); PFN_PIXEL_KERNEL func; @@ -1031,56 +1109,25 @@ swr_update_derived(struct pipe_context *pipe, psState.usesUAV = false; // XXX psState.forceEarlyZ = false; SwrSetPixelShaderState(ctx->swrContext, &psState); - } - - /* JIT sampler state */ - if (ctx->dirty & SWR_NEW_SAMPLER) { - swr_draw_context *pDC = &ctx->swrDC; - for (unsigned i = 0; i < key.nr_samplers; i++) { - const struct pipe_sampler_state *sampler = - ctx->samplers[PIPE_SHADER_FRAGMENT][i]; - - if (sampler) { - pDC->samplersFS[i].min_lod = sampler->min_lod; - pDC->samplersFS[i].max_lod = sampler->max_lod; - pDC->samplersFS[i].lod_bias = sampler->lod_bias; - COPY_4V(pDC->samplersFS[i].border_color, sampler->border_color.f); - } + /* JIT sampler state */ + if (ctx->dirty & SWR_NEW_SAMPLER) { + swr_update_sampler_state(ctx, + PIPE_SHADER_FRAGMENT, + key.nr_samplers, + ctx->swrDC.samplersFS); } - } - - /* JIT sampler view state */ - if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { - swr_draw_context *pDC = &ctx->swrDC; - for (unsigned i = 0; i < key.nr_sampler_views; i++) { - struct pipe_sampler_view *view = - ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]; - - if (view) { - struct pipe_resource *res = view->texture; - struct swr_resource *swr_res = swr_resource(res); - struct swr_jit_texture *jit_tex = &pDC->texturesFS[i]; - memset(jit_tex, 0, sizeof(*jit_tex)); - jit_tex->width = res->width0; - jit_tex->height = res->height0; - jit_tex->depth = res->depth0; - jit_tex->first_level = view->u.tex.first_level; - jit_tex->last_level = view->u.tex.last_level; - jit_tex->base_ptr = swr_res->swr.pBaseAddress; - - for (unsigned level = jit_tex->first_level; - level <= jit_tex->last_level; - level++) { - jit_tex->row_stride[level] = swr_res->row_stride[level]; - jit_tex->img_stride[level] = swr_res->img_stride[level]; - jit_tex->mip_offsets[level] = swr_res->mip_offsets[level]; - } - } + /* JIT sampler view state */ + if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { + swr_update_texture_state(ctx, + PIPE_SHADER_FRAGMENT, + key.nr_sampler_views, + ctx->swrDC.texturesFS); } } + /* VertexShader Constants */ if (ctx->dirty & SWR_NEW_VSCONSTANTS) { swr_draw_context *pDC = &ctx->swrDC; diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h index f0a7ff3b185..32a5441295b 100644 --- a/src/gallium/drivers/swr/swr_state.h +++ b/src/gallium/drivers/swr/swr_state.h @@ -40,9 +40,9 @@ struct swr_vertex_shader { struct pipe_shader_state pipe; struct lp_tgsi_info info; unsigned linkageMask; - PFN_VERTEX_FUNC func; + std::unordered_map map; SWR_STREAMOUT_STATE soState; - PFN_SO_FUNC soFunc[PIPE_PRIM_MAX]; + PFN_SO_FUNC soFunc[PIPE_PRIM_MAX] {0}; }; struct swr_fragment_shader { @@ -50,7 +50,7 @@ struct swr_fragment_shader { struct lp_tgsi_info info; uint32_t constantMask; uint32_t pointSpriteMask; - std::unordered_map map; + std::unordered_map map; }; /* Vertex element state */ diff --git a/src/gallium/drivers/swr/swr_tex_sample.cpp b/src/gallium/drivers/swr/swr_tex_sample.cpp index 8e01e32e280..8172c820f22 100644 --- a/src/gallium/drivers/swr/swr_tex_sample.cpp +++ b/src/gallium/drivers/swr/swr_tex_sample.cpp @@ -72,6 +72,8 @@ struct swr_sampler_dynamic_state { struct lp_sampler_dynamic_state base; const struct swr_sampler_static_state *static_state; + + unsigned shader_type; }; @@ -112,7 +114,18 @@ swr_texture_member(const struct lp_sampler_dynamic_state *base, /* context[0] */ indices[0] = lp_build_const_int32(gallivm, 0); /* context[0].textures */ - indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesFS); + auto dynamic = (const struct swr_sampler_dynamic_state *)base; + switch (dynamic->shader_type) { + case PIPE_SHADER_FRAGMENT: + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesFS); + break; + case PIPE_SHADER_VERTEX: + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesVS); + break; + default: + assert(0 && "unsupported shader type"); + break; + } /* context[0].textures[unit] */ indices[2] = lp_build_const_int32(gallivm, texture_unit); /* context[0].textures[unit].member */ @@ -195,7 +208,18 @@ swr_sampler_member(const struct lp_sampler_dynamic_state *base, /* context[0] */ indices[0] = lp_build_const_int32(gallivm, 0); /* context[0].samplers */ - indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersFS); + auto dynamic = (const struct swr_sampler_dynamic_state *)base; + switch (dynamic->shader_type) { + case PIPE_SHADER_FRAGMENT: + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersFS); + break; + case PIPE_SHADER_VERTEX: + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersVS); + break; + default: + assert(0 && "unsupported shader type"); + break; + } /* context[0].samplers[unit] */ indices[2] = lp_build_const_int32(gallivm, sampler_unit); /* context[0].samplers[unit].member */ @@ -307,7 +331,8 @@ swr_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base, struct lp_build_sampler_soa * -swr_sampler_soa_create(const struct swr_sampler_static_state *static_state) +swr_sampler_soa_create(const struct swr_sampler_static_state *static_state, + unsigned shader_type) { struct swr_sampler_soa *sampler; @@ -334,5 +359,7 @@ swr_sampler_soa_create(const struct swr_sampler_static_state *static_state) sampler->dynamic_state.static_state = static_state; + sampler->dynamic_state.shader_type = shader_type; + return &sampler->base; } diff --git a/src/gallium/drivers/swr/swr_tex_sample.h b/src/gallium/drivers/swr/swr_tex_sample.h index f5c368c108d..cb7e83d1c39 100644 --- a/src/gallium/drivers/swr/swr_tex_sample.h +++ b/src/gallium/drivers/swr/swr_tex_sample.h @@ -44,4 +44,4 @@ struct swr_sampler_static_state { * */ struct lp_build_sampler_soa * -swr_sampler_soa_create(const struct swr_sampler_static_state *key); +swr_sampler_soa_create(const struct swr_sampler_static_state *key, unsigned shader_type); -- 2.30.2