Move fixed attributes to the top and pack single component SGVs.
WIP to support dynamically allocated vertex size.
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex)
{
simdvector vRtai[3];
- pa.Assemble(VERTEX_RTAI_SLOT, vRtai);
+ pa.Assemble(VERTEX_SGV_SLOT, vRtai);
simdscalari vRtaii;
- vRtaii = _simd_castps_si(vRtai[0].x);
+ vRtaii = _simd_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
_simd_store_si((simdscalari*)aRTAI, vRtaii);
}
else
scisYmax = _simd16_set1_epi32(state.scissorsInFixedPoint[0].ymax);
}
+ // Make triangle bbox inclusive
+ bbox.xmax = _simd16_sub_epi32(bbox.xmax, _simd16_set1_epi32(1));
+ bbox.ymax = _simd16_sub_epi32(bbox.ymax, _simd16_set1_epi32(1));
+
bbox.xmin = _simd16_max_epi32(bbox.xmin, scisXmin);
bbox.ymin = _simd16_max_epi32(bbox.ymin, scisYmin);
- bbox.xmax = _simd16_min_epi32(_simd16_sub_epi32(bbox.xmax, _simd16_set1_epi32(1)), scisXmax);
- bbox.ymax = _simd16_min_epi32(_simd16_sub_epi32(bbox.ymax, _simd16_set1_epi32(1)), scisYmax);
+ bbox.xmax = _simd16_min_epi32(bbox.xmax, scisXmax);
+ bbox.ymax = _simd16_min_epi32(bbox.ymax, scisYmax);
if (CT::IsConservativeT::value)
{
if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex)
{
simd16vector vRtai[3];
- pa.Assemble_simd16(VERTEX_RTAI_SLOT, vRtai);
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vRtai);
simd16scalari vRtaii;
- vRtaii = _simd16_castps_si(vRtai[0].x);
+ vRtaii = _simd16_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
_simd16_store_si(reinterpret_cast<simd16scalari *>(aRTAI), vRtaii);
}
else
if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex)
{
simdvector vRtai;
- pa.Assemble(VERTEX_RTAI_SLOT, &vRtai);
- simdscalari vRtaii = _simd_castps_si(vRtai.x);
+ pa.Assemble(VERTEX_SGV_SLOT, &vRtai);
+ simdscalari vRtaii = _simd_castps_si(vRtai[VERTEX_SGV_RTAI_COMP]);
_simd_store_si((simdscalari*)aRTAI, vRtaii);
}
else
if (rastState.pointParam)
{
simdvector size[3];
- pa.Assemble(VERTEX_POINT_SIZE_SLOT, size);
- vPointSize = size[0].x;
+ pa.Assemble(VERTEX_SGV_SLOT, size);
+ vPointSize = size[0][VERTEX_SGV_POINT_SIZE_COMP];
}
else
{
if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex)
{
simdvector vRtai[2];
- pa.Assemble(VERTEX_RTAI_SLOT, vRtai);
- simdscalari vRtaii = _simd_castps_si(vRtai[0].x);
+ pa.Assemble(VERTEX_SGV_SLOT, vRtai);
+ simdscalari vRtaii = _simd_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
_simd_store_si((simdscalari*)aRTAI, vRtaii);
}
else
if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex)
{
simd16vector vRtai;
- pa.Assemble_simd16(VERTEX_RTAI_SLOT, &vRtai);
- simd16scalari vRtaii = _simd16_castps_si(vRtai.x);
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, &vRtai);
+ simd16scalari vRtaii = _simd16_castps_si(vRtai[VERTEX_SGV_RTAI_COMP]);
_simd16_store_si(reinterpret_cast<simd16scalari *>(aRTAI), vRtaii);
}
else
if (rastState.pointParam)
{
simd16vector size[3];
- pa.Assemble_simd16(VERTEX_POINT_SIZE_SLOT, size);
- vPointSize = size[0].x;
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, size);
+ vPointSize = size[0][VERTEX_SGV_POINT_SIZE_COMP];
}
else
{
if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex)
{
simd16vector vRtai[2];
- pa.Assemble_simd16(VERTEX_RTAI_SLOT, vRtai);
- simd16scalari vRtaii = _simd16_castps_si(vRtai[0].x);
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vRtai);
+ simd16scalari vRtaii = _simd16_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
_simd16_store_si(reinterpret_cast<simd16scalari *>(aRTAI), vRtaii);
}
else
if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex)
{
simdvector vRtai[2];
- pa.Assemble(VERTEX_RTAI_SLOT, vRtai);
- simdscalari vRtaii = _simd_castps_si(vRtai[0].x);
+ pa.Assemble(VERTEX_SGV_SLOT, vRtai);
+ simdscalari vRtaii = _simd_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
_simd_store_si((simdscalari*)aRTAI, vRtaii);
}
else
if (gsState.gsEnable && gsState.emitsRenderTargetArrayIndex)
{
simd16vector vRtai[2];
- pa.Assemble_simd16(VERTEX_RTAI_SLOT, vRtai);
- simd16scalari vRtaii = _simd16_castps_si(vRtai[0].x);
+ pa.Assemble_simd16(VERTEX_SGV_SLOT, vRtai);
+ simd16scalari vRtaii = _simd16_castps_si(vRtai[0][VERTEX_SGV_RTAI_COMP]);
_simd16_store_si(reinterpret_cast<simd16scalari *>(aRTAI), vRtaii);
}
else
if (state.gsState.emitsViewportArrayIndex)
{
simd16vector vpiAttrib[3];
- gsPa.Assemble_simd16(VERTEX_VIEWPORT_ARRAY_INDEX_SLOT, vpiAttrib);
+ gsPa.Assemble_simd16(VERTEX_SGV_SLOT, vpiAttrib);
// OOB indices => forced to zero.
+ simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask = _simd16_cmplt_epi32(_simd16_castps_si(vpiAttrib[0].x), vNumViewports);
- vpiAttrib[0].x = _simd16_and_ps(_simd16_castsi_ps(vClearMask), vpiAttrib[0].x);
-
- vViewPortIdx = _simd16_castps_si(vpiAttrib[0].x);
+ simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
+ vViewPortIdx = _simd16_and_si(vClearMask, vpai);
}
else
{
if (state.gsState.emitsViewportArrayIndex)
{
simdvector vpiAttrib[3];
- gsPa.Assemble(VERTEX_VIEWPORT_ARRAY_INDEX_SLOT, vpiAttrib);
+ gsPa.Assemble(VERTEX_SGV_SLOT, vpiAttrib);
+ simdscalari vpai = _simd_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
// OOB indices => forced to zero.
simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask = _simd_cmplt_epi32(_simd_castps_si(vpiAttrib[0].x), vNumViewports);
- vpiAttrib[0].x = _simd_and_ps(_simd_castsi_ps(vClearMask), vpiAttrib[0].x);
-
- vViewPortIdx = _simd_castps_si(vpiAttrib[0].x);
+ simdscalari vClearMask = _simd_cmplt_epi32(vpai, vNumViewports);
+ vViewPortIdx = _simd_and_si(vClearMask, vpai);
}
else
{
/////////////////////////////////////////////////////////////////////////
/// simdvertex
/// @brief Defines a vertex element that holds all the data for SIMD vertices.
-/// Contains position in clip space, hardcoded to attribute 0,
-/// space for up to 32 attributes, as well as any SGV values generated
-/// by the pipeline
+/// Contains space for position, SGV, and 32 generic attributes
/////////////////////////////////////////////////////////////////////////
enum SWR_VTX_SLOTS
{
- VERTEX_POSITION_SLOT = 0,
- VERTEX_POSITION_END_SLOT = 0,
- VERTEX_ATTRIB_START_SLOT = ( 1 + VERTEX_POSITION_END_SLOT),
- VERTEX_ATTRIB_END_SLOT = (32 + VERTEX_POSITION_END_SLOT),
- VERTEX_RTAI_SLOT = (33 + VERTEX_POSITION_END_SLOT), // GS writes RenderTargetArrayIndex here
- VERTEX_CLIPCULL_DIST_LO_SLOT = (34 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
- VERTEX_CLIPCULL_DIST_HI_SLOT = (35 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
- VERTEX_POINT_SIZE_SLOT = (36 + VERTEX_POSITION_END_SLOT), // VS writes point size here
- VERTEX_VIEWPORT_ARRAY_INDEX_SLOT = (37 + VERTEX_POSITION_END_SLOT),
- SWR_VTX_NUM_SLOTS = VERTEX_VIEWPORT_ARRAY_INDEX_SLOT,
+ VERTEX_SGV_SLOT = 0,
+ VERTEX_SGV_RTAI_COMP = 0,
+ VERTEX_SGV_VAI_COMP = 1,
+ VERTEX_SGV_POINT_SIZE_COMP = 2,
+ VERTEX_POSITION_SLOT = 1,
+ VERTEX_POSITION_END_SLOT = 1,
+ VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
+ VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
+ VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT),
+ VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT),
+ SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT)
};
// SoAoSoA
for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
uint32_t attribSlot = attrib;
- if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
- attribSlot = VERTEX_POINT_SIZE_SLOT;
- else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER)
- attribSlot = VERTEX_RTAI_SLOT;
+ uint32_t sgvChannel = 0;
+ if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
+ attribSlot = VERTEX_SGV_SLOT;
+ sgvChannel = VERTEX_SGV_POINT_SIZE_COMP;
+ } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) {
+ attribSlot = VERTEX_SGV_SLOT;
+ sgvChannel = VERTEX_SGV_RTAI_COMP;
+ } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
+ attribSlot = VERTEX_POSITION_SLOT;
+ } else {
+ attribSlot = VERTEX_ATTRIB_START_SLOT + attrib - 1;
+ }
#if USE_SIMD16_FRONTEND
Value *vOffsetsAttrib =
ADD(vOffsetsAttrib, MUL(vSimdSlot, VIMMED1((uint32_t)sizeof(float))));
for (uint32_t channel = 0; channel < 4; ++channel) {
- Value *vData = LOAD(unwrap(outputs[attrib][channel]));
Value *vPtrs = GEP(pStream, vOffsetsAttrib);
+ Value *vData;
+
+ if (attribSlot == VERTEX_SGV_SLOT)
+ vData = LOAD(unwrap(outputs[attrib][0]));
+ else
+ vData = LOAD(unwrap(outputs[attrib][channel]));
- vPtrs = BITCAST(vPtrs,
- VectorType::get(PointerType::get(mFP32Ty, 0), 8));
+ if (attribSlot != VERTEX_SGV_SLOT ||
+ sgvChannel == channel) {
+ vPtrs = BITCAST(vPtrs,
+ VectorType::get(PointerType::get(mFP32Ty, 0), 8));
- MASKED_SCATTER(vData, vPtrs, 32, vMask1);
+ MASKED_SCATTER(vData, vPtrs, 32, vMask1);
+ }
#if USE_SIMD16_FRONTEND
vOffsetsAttrib =
ubyte semantic_name = info->input_semantic_name[slot];
ubyte semantic_idx = info->input_semantic_index[slot];
- unsigned vs_slot =
- locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base) + 1;
+ unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
+
+ vs_slot += VERTEX_ATTRIB_START_SLOT;
+
+ if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
+ vs_slot--;
+
+ if (semantic_name == TGSI_SEMANTIC_POSITION)
+ vs_slot = VERTEX_POSITION_SLOT;
STORE(C(vs_slot), vtxAttribMap, {0, slot});
mapConstants.push_back(C(vs_slot));
if (!outputs[attrib][channel])
continue;
- Value *val = LOAD(unwrap(outputs[attrib][channel]));
+ Value *val;
+ uint32_t outSlot;
+
+ if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
+ if (channel != VERTEX_SGV_POINT_SIZE_COMP)
+ continue;
+ val = LOAD(unwrap(outputs[attrib][0]));
+ outSlot = VERTEX_SGV_SLOT;
+ } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
+ val = LOAD(unwrap(outputs[attrib][channel]));
+ outSlot = VERTEX_POSITION_SLOT;
+ } else {
+ val = LOAD(unwrap(outputs[attrib][channel]));
+ outSlot = VERTEX_ATTRIB_START_SLOT + attrib;
+ if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
+ outSlot--;
+ }
- uint32_t outSlot = attrib;
- if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
- outSlot = VERTEX_POINT_SIZE_SLOT;
WriteVS(val, pVsCtx, vtxOutput, outSlot, channel);
}
}
unsigned cv = 0;
if (swr_vs->info.base.writes_clipvertex) {
- cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
- &swr_vs->info.base);
+ cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
+ &swr_vs->info.base);
} else {
for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
// clip distance overrides user clip planes
if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) {
- unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
- &swr_vs->info.base);
+ unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
+ &swr_vs->info.base);
if (val < 4) {
LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
if ((info->output_semantic_name[i] == name)
&& (info->output_semantic_index[i] == index)) {
- return i - 1; // position is not part of the linkage
+ return i;
}
}
}
unsigned linkedAttrib =
- locate_linkage(semantic_name, semantic_idx, pPrevShader);
+ locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1;
uint32_t extraAttribs = 0;
if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) {
Value *offset = NULL;
if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
bcolorAttrib = locate_linkage(
- TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader);
+ TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader) - 1;
/* Neither front nor back colors were available. Nothing to load. */
if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
continue;