From b10cdb217a1638aa7cbd2c7bbb580d180512f3f3 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Thu, 1 Jun 2017 13:08:04 -0500 Subject: [PATCH] swr/rast: Rework attribute layout Move fixed attributes to the top and pack single component SGVs. WIP to support dynamically allocated vertex size. Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/core/binner.cpp | 48 ++++++------ .../drivers/swr/rasterizer/core/frontend.cpp | 18 ++--- .../drivers/swr/rasterizer/core/state.h | 25 +++--- src/gallium/drivers/swr/swr_shader.cpp | 78 ++++++++++++++----- 4 files changed, 103 insertions(+), 66 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index 06078cdcfa1..4f8498d8b97 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -766,9 +766,9 @@ endBinTriangles: if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex) { simdvector vRtai[3]; - pa.Assemble(VERTEX_RTAI_SLOT, vRtai); + pa.Assemble(VERTEX_SGV_SLOT, vRtai); simdscalari vRtaii; - vRtaii = _simd_castps_si(vRtai[0].x); + vRtaii = _simd_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); _simd_store_si((simdscalari*)aRTAI, vRtaii); } else @@ -1114,10 +1114,14 @@ void SIMDAPI BinTriangles_simd16( scisYmax = _simd16_set1_epi32(state.scissorsInFixedPoint[0].ymax); } + // Make triangle bbox inclusive + bbox.xmax = _simd16_sub_epi32(bbox.xmax, _simd16_set1_epi32(1)); + bbox.ymax = _simd16_sub_epi32(bbox.ymax, _simd16_set1_epi32(1)); + bbox.xmin = _simd16_max_epi32(bbox.xmin, scisXmin); bbox.ymin = _simd16_max_epi32(bbox.ymin, scisYmin); - bbox.xmax = _simd16_min_epi32(_simd16_sub_epi32(bbox.xmax, _simd16_set1_epi32(1)), scisXmax); - bbox.ymax = _simd16_min_epi32(_simd16_sub_epi32(bbox.ymax, _simd16_set1_epi32(1)), scisYmax); + bbox.xmax = _simd16_min_epi32(bbox.xmax, scisXmax); + bbox.ymax = _simd16_min_epi32(bbox.ymax, scisYmax); if (CT::IsConservativeT::value) { @@ -1212,9 +1216,9 @@ endBinTriangles: if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex) { simd16vector vRtai[3]; - pa.Assemble_simd16(VERTEX_RTAI_SLOT, vRtai); + pa.Assemble_simd16(VERTEX_SGV_SLOT, vRtai); simd16scalari vRtaii; - vRtaii = _simd16_castps_si(vRtai[0].x); + vRtaii = _simd16_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); _simd16_store_si(reinterpret_cast(aRTAI), vRtaii); } else @@ -1422,8 +1426,8 @@ void BinPostSetupPoints( if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex) { simdvector vRtai; - pa.Assemble(VERTEX_RTAI_SLOT, &vRtai); - simdscalari vRtaii = _simd_castps_si(vRtai.x); + pa.Assemble(VERTEX_SGV_SLOT, &vRtai); + simdscalari vRtaii = _simd_castps_si(vRtai[VERTEX_SGV_RTAI_COMP]); _simd_store_si((simdscalari*)aRTAI, vRtaii); } else @@ -1496,8 +1500,8 @@ void BinPostSetupPoints( if (rastState.pointParam) { simdvector size[3]; - pa.Assemble(VERTEX_POINT_SIZE_SLOT, size); - vPointSize = size[0].x; + pa.Assemble(VERTEX_SGV_SLOT, size); + vPointSize = size[0][VERTEX_SGV_POINT_SIZE_COMP]; } else { @@ -1562,8 +1566,8 @@ void BinPostSetupPoints( if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex) { simdvector vRtai[2]; - pa.Assemble(VERTEX_RTAI_SLOT, vRtai); - simdscalari vRtaii = _simd_castps_si(vRtai[0].x); + pa.Assemble(VERTEX_SGV_SLOT, vRtai); + simdscalari vRtaii = _simd_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); _simd_store_si((simdscalari*)aRTAI, vRtaii); } else @@ -1792,8 +1796,8 @@ void BinPostSetupPoints_simd16( if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex) { simd16vector vRtai; - pa.Assemble_simd16(VERTEX_RTAI_SLOT, &vRtai); - simd16scalari vRtaii = _simd16_castps_si(vRtai.x); + pa.Assemble_simd16(VERTEX_SGV_SLOT, &vRtai); + simd16scalari vRtaii = _simd16_castps_si(vRtai[VERTEX_SGV_RTAI_COMP]); _simd16_store_si(reinterpret_cast(aRTAI), vRtaii); } else @@ -1868,8 +1872,8 @@ void BinPostSetupPoints_simd16( if (rastState.pointParam) { simd16vector size[3]; - pa.Assemble_simd16(VERTEX_POINT_SIZE_SLOT, size); - vPointSize = size[0].x; + pa.Assemble_simd16(VERTEX_SGV_SLOT, size); + vPointSize = size[0][VERTEX_SGV_POINT_SIZE_COMP]; } else { @@ -1937,8 +1941,8 @@ void BinPostSetupPoints_simd16( if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex) { simd16vector vRtai[2]; - pa.Assemble_simd16(VERTEX_RTAI_SLOT, vRtai); - simd16scalari vRtaii = _simd16_castps_si(vRtai[0].x); + pa.Assemble_simd16(VERTEX_SGV_SLOT, vRtai); + simd16scalari vRtaii = _simd16_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); _simd16_store_si(reinterpret_cast(aRTAI), vRtaii); } else @@ -2218,8 +2222,8 @@ void BinPostSetupLines( if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex) { simdvector vRtai[2]; - pa.Assemble(VERTEX_RTAI_SLOT, vRtai); - simdscalari vRtaii = _simd_castps_si(vRtai[0].x); + pa.Assemble(VERTEX_SGV_SLOT, vRtai); + simdscalari vRtaii = _simd_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); _simd_store_si((simdscalari*)aRTAI, vRtaii); } else @@ -2435,8 +2439,8 @@ void BinPostSetupLines_simd16( if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex) { simd16vector vRtai[2]; - pa.Assemble_simd16(VERTEX_RTAI_SLOT, vRtai); - simd16scalari vRtaii = _simd16_castps_si(vRtai[0].x); + pa.Assemble_simd16(VERTEX_SGV_SLOT, vRtai); + simd16scalari vRtaii = _simd16_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]); _simd16_store_si(reinterpret_cast(aRTAI), vRtaii); } else diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index e2660c1cc90..676a4456575 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -955,14 +955,13 @@ static void GeometryShaderStage( if (state.gsState.emitsViewportArrayIndex) { simd16vector vpiAttrib[3]; - gsPa.Assemble_simd16(VERTEX_VIEWPORT_ARRAY_INDEX_SLOT, vpiAttrib); + gsPa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib); // OOB indices => forced to zero. + simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); - simd16scalari vClearMask = _simd16_cmplt_epi32(_simd16_castps_si(vpiAttrib[0].x), vNumViewports); - vpiAttrib[0].x = _simd16_and_ps(_simd16_castsi_ps(vClearMask), vpiAttrib[0].x); - - vViewPortIdx = _simd16_castps_si(vpiAttrib[0].x); + simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports); + vViewPortIdx = _simd16_and_si(vClearMask, vpai); } else { @@ -979,14 +978,13 @@ static void GeometryShaderStage( if (state.gsState.emitsViewportArrayIndex) { simdvector vpiAttrib[3]; - gsPa.Assemble(VERTEX_VIEWPORT_ARRAY_INDEX_SLOT, vpiAttrib); + gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib); + simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]); // OOB indices => forced to zero. simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); - simdscalari vClearMask = _simd_cmplt_epi32(_simd_castps_si(vpiAttrib[0].x), vNumViewports); - vpiAttrib[0].x = _simd_and_ps(_simd_castsi_ps(vClearMask), vpiAttrib[0].x); - - vViewPortIdx = _simd_castps_si(vpiAttrib[0].x); + simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports); + vViewPortIdx = _simd_and_si(vClearMask, vpai); } else { diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index f7030511474..4c0c1db412c 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -175,22 +175,21 @@ enum SWR_OUTER_TESSFACTOR_ID ///////////////////////////////////////////////////////////////////////// /// simdvertex /// @brief Defines a vertex element that holds all the data for SIMD vertices. -/// Contains position in clip space, hardcoded to attribute 0, -/// space for up to 32 attributes, as well as any SGV values generated -/// by the pipeline +/// Contains space for position, SGV, and 32 generic attributes ///////////////////////////////////////////////////////////////////////// enum SWR_VTX_SLOTS { - VERTEX_POSITION_SLOT = 0, - VERTEX_POSITION_END_SLOT = 0, - VERTEX_ATTRIB_START_SLOT = ( 1 + VERTEX_POSITION_END_SLOT), - VERTEX_ATTRIB_END_SLOT = (32 + VERTEX_POSITION_END_SLOT), - VERTEX_RTAI_SLOT = (33 + VERTEX_POSITION_END_SLOT), // GS writes RenderTargetArrayIndex here - VERTEX_CLIPCULL_DIST_LO_SLOT = (34 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist - VERTEX_CLIPCULL_DIST_HI_SLOT = (35 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist - VERTEX_POINT_SIZE_SLOT = (36 + VERTEX_POSITION_END_SLOT), // VS writes point size here - VERTEX_VIEWPORT_ARRAY_INDEX_SLOT = (37 + VERTEX_POSITION_END_SLOT), - SWR_VTX_NUM_SLOTS = VERTEX_VIEWPORT_ARRAY_INDEX_SLOT, + VERTEX_SGV_SLOT = 0, + VERTEX_SGV_RTAI_COMP = 0, + VERTEX_SGV_VAI_COMP = 1, + VERTEX_SGV_POINT_SIZE_COMP = 2, + VERTEX_POSITION_SLOT = 1, + VERTEX_POSITION_END_SLOT = 1, + VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist + VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist + VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT), + VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT), + SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT) }; // SoAoSoA diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index 44988892691..2a772939e2d 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -404,10 +404,18 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) { uint32_t attribSlot = attrib; - if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) - attribSlot = VERTEX_POINT_SIZE_SLOT; - else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) - attribSlot = VERTEX_RTAI_SLOT; + uint32_t sgvChannel = 0; + if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) { + attribSlot = VERTEX_SGV_SLOT; + sgvChannel = VERTEX_SGV_POINT_SIZE_COMP; + } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) { + attribSlot = VERTEX_SGV_SLOT; + sgvChannel = VERTEX_SGV_RTAI_COMP; + } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) { + attribSlot = VERTEX_POSITION_SLOT; + } else { + attribSlot = VERTEX_ATTRIB_START_SLOT + attrib - 1; + } #if USE_SIMD16_FRONTEND Value *vOffsetsAttrib = @@ -424,13 +432,21 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base ADD(vOffsetsAttrib, MUL(vSimdSlot, VIMMED1((uint32_t)sizeof(float)))); for (uint32_t channel = 0; channel < 4; ++channel) { - Value *vData = LOAD(unwrap(outputs[attrib][channel])); Value *vPtrs = GEP(pStream, vOffsetsAttrib); + Value *vData; + + if (attribSlot == VERTEX_SGV_SLOT) + vData = LOAD(unwrap(outputs[attrib][0])); + else + vData = LOAD(unwrap(outputs[attrib][channel])); - vPtrs = BITCAST(vPtrs, - VectorType::get(PointerType::get(mFP32Ty, 0), 8)); + if (attribSlot != VERTEX_SGV_SLOT || + sgvChannel == channel) { + vPtrs = BITCAST(vPtrs, + VectorType::get(PointerType::get(mFP32Ty, 0), 8)); - MASKED_SCATTER(vData, vPtrs, 32, vMask1); + MASKED_SCATTER(vData, vPtrs, 32, vMask1); + } #if USE_SIMD16_FRONTEND vOffsetsAttrib = @@ -597,8 +613,15 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) ubyte semantic_name = info->input_semantic_name[slot]; ubyte semantic_idx = info->input_semantic_index[slot]; - unsigned vs_slot = - locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base) + 1; + unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); + + vs_slot += VERTEX_ATTRIB_START_SLOT; + + if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) + vs_slot--; + + if (semantic_name == TGSI_SEMANTIC_POSITION) + vs_slot = VERTEX_POSITION_SLOT; STORE(C(vs_slot), vtxAttribMap, {0, slot}); mapConstants.push_back(C(vs_slot)); @@ -789,11 +812,24 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) if (!outputs[attrib][channel]) continue; - Value *val = LOAD(unwrap(outputs[attrib][channel])); + Value *val; + uint32_t outSlot; + + if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) { + if (channel != VERTEX_SGV_POINT_SIZE_COMP) + continue; + val = LOAD(unwrap(outputs[attrib][0])); + outSlot = VERTEX_SGV_SLOT; + } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) { + val = LOAD(unwrap(outputs[attrib][channel])); + outSlot = VERTEX_POSITION_SLOT; + } else { + val = LOAD(unwrap(outputs[attrib][channel])); + outSlot = VERTEX_ATTRIB_START_SLOT + attrib; + if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) + outSlot--; + } - uint32_t outSlot = attrib; - if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) - outSlot = VERTEX_POINT_SIZE_SLOT; WriteVS(val, pVsCtx, vtxOutput, outSlot, channel); } } @@ -804,8 +840,8 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) unsigned cv = 0; if (swr_vs->info.base.writes_clipvertex) { - cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0, - &swr_vs->info.base); + cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0, + &swr_vs->info.base); } else { for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && @@ -824,8 +860,8 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) // clip distance overrides user clip planes if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) || ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) { - unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, - &swr_vs->info.base); + unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, + &swr_vs->info.base); if (val < 4) { LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], ""); WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val); @@ -894,7 +930,7 @@ locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info) for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { if ((info->output_semantic_name[i] == name) && (info->output_semantic_index[i] == index)) { - return i - 1; // position is not part of the linkage + return i; } } @@ -1043,7 +1079,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) } unsigned linkedAttrib = - locate_linkage(semantic_name, semantic_idx, pPrevShader); + locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1; uint32_t extraAttribs = 0; if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) { @@ -1079,7 +1115,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) Value *offset = NULL; if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) { bcolorAttrib = locate_linkage( - TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader); + TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader) - 1; /* Neither front nor back colors were available. Nothing to load. */ if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF) continue; -- 2.30.2